From 38d681c008e8498be0b814f9de3b54229c75df9f Mon Sep 17 00:00:00 2001 From: Daniel Goldstein Date: Thu, 7 Sep 2023 14:16:17 -0400 Subject: [PATCH 1/4] [notebook] Remove the notebook and workshop service --- build.yaml | 103 -- dev-docs/hail-overview.md | 9 +- letsencrypt/subdomains.txt | 2 - notebook/.gitignore | 4 - notebook/Dockerfile | 28 - notebook/Dockerfile.nginx | 13 - notebook/MANIFEST.in | 2 - notebook/Makefile | 26 - notebook/deployment.yaml | 232 --- notebook/images/hail/Dockerfile | 26 - .../resources/Hail-Workshop-Notebook.ipynb | 1465 ---------------- notebook/nginx.conf | 190 -- notebook/notebook/__init__.py | 3 - notebook/notebook/__main__.py | 7 - notebook/notebook/notebook.py | 780 --------- notebook/notebook/styles/pages/notebook.scss | 91 - notebook/notebook/templates/index.html | 14 - notebook/notebook/templates/login.html | 14 - .../notebook/templates/notebook-form.html | 4 - .../notebook/templates/notebook-state.html | 32 - notebook/notebook/templates/notebook.html | 14 - .../notebook/templates/workshop-admin.html | 79 - .../notebook/templates/workshop/header.html | 31 - .../notebook/templates/workshop/index.html | 27 - .../notebook/templates/workshop/login.html | 26 - .../templates/workshop/resources.html | 30 - notebook/proxy.conf | 6 - notebook/scale-test.py | 123 -- notebook/setup.py | 12 - notebook/sql/initial.sql | 24 - notebook/test-playbook.txt | 79 - notebook/worker/Dockerfile | 24 - .../resources/Hail-Workshop-Notebook.ipynb | 1531 ----------------- pyproject.toml | 1 - tls/config.yaml | 10 - web_common/web_common/styles/main.scss | 2 +- web_common/web_common/templates/header.html | 15 - web_common/web_common/templates/layout.html | 4 - web_common/web_common/web_common.py | 2 - website/website/templates/nav-top.html | 3 - 40 files changed, 2 insertions(+), 5086 deletions(-) delete mode 100644 notebook/.gitignore delete mode 100644 notebook/Dockerfile delete mode 100644 notebook/Dockerfile.nginx delete mode 100644 notebook/MANIFEST.in delete mode 100644 notebook/Makefile delete mode 100644 notebook/deployment.yaml delete mode 100644 notebook/images/hail/Dockerfile delete mode 100644 notebook/images/hail/resources/Hail-Workshop-Notebook.ipynb delete mode 100644 notebook/nginx.conf delete mode 100644 notebook/notebook/__init__.py delete mode 100644 notebook/notebook/__main__.py delete mode 100644 notebook/notebook/notebook.py delete mode 100644 notebook/notebook/styles/pages/notebook.scss delete mode 100644 notebook/notebook/templates/index.html delete mode 100644 notebook/notebook/templates/login.html delete mode 100644 notebook/notebook/templates/notebook-form.html delete mode 100644 notebook/notebook/templates/notebook-state.html delete mode 100644 notebook/notebook/templates/notebook.html delete mode 100644 notebook/notebook/templates/workshop-admin.html delete mode 100644 notebook/notebook/templates/workshop/header.html delete mode 100644 notebook/notebook/templates/workshop/index.html delete mode 100644 notebook/notebook/templates/workshop/login.html delete mode 100644 notebook/notebook/templates/workshop/resources.html delete mode 100644 notebook/proxy.conf delete mode 100644 notebook/scale-test.py delete mode 100644 notebook/setup.py delete mode 100644 notebook/sql/initial.sql delete mode 100644 notebook/test-playbook.txt delete mode 100644 notebook/worker/Dockerfile delete mode 100644 notebook/worker/resources/Hail-Workshop-Notebook.ipynb diff --git a/build.yaml b/build.yaml index 106f43989fd..09fa9d2086c 100644 --- a/build.yaml +++ b/build.yaml @@ -944,68 +944,6 @@ steps: - create_test_gsa_keys - build_hail_debug_jar_and_wheel - build_hail_test_artifacts - - kind: buildImage2 - name: notebook_image - dockerFile: /io/repo/notebook/Dockerfile - contextPath: /io/repo/ - publishAs: notebook - inputs: - - from: /repo/notebook - to: /io/repo/notebook - - from: /repo/hail/python/setup-hailtop.py - to: /io/repo/hail/python/setup-hailtop.py - - from: /repo/hail/python/MANIFEST.in - to: /io/repo/hail/python/MANIFEST.in - - from: /repo/hail/python/hailtop - to: /io/repo/hail/python/hailtop - - from: /hail_version - to: /io/repo/hail/python/hailtop/hail_version - - from: /repo/gear - to: /io/repo/gear - - from: /repo/web_common - to: /io/repo/web_common - dependsOn: - - hail_ubuntu_image - - merge_code - - kind: runImage - name: render_notebook_nginx_conf - image: - valueFrom: ci_utils_image.image - script: | - set -ex - cd /io/repo/notebook - {% if deploy %} - DEPLOY=true - {% else %} - DEPLOY=false - {% endif %} - python3 ../ci/jinja2_render.py '{"deploy": '${DEPLOY}', "default_ns": {"name": "{{ default_ns.name }}"}}' nginx.conf nginx.conf.out - inputs: - - from: /repo/ci/jinja2_render.py - to: /io/repo/ci/jinja2_render.py - - from: /repo/notebook - to: /io/repo/notebook - outputs: - - from: /io/repo/notebook/nginx.conf.out - to: /notebook/nginx.conf.out - dependsOn: - - default_ns - - ci_utils_image - - merge_code - - kind: buildImage2 - name: notebook_nginx_image - dockerFile: /io/notebook/Dockerfile.nginx - contextPath: /io/notebook - publishAs: notebook_nginx - inputs: - - from: /repo/notebook - to: /io/notebook - - from: /notebook/nginx.conf.out - to: /io/notebook/nginx.conf.out - dependsOn: - - hail_ubuntu_image - - render_notebook_nginx_conf - - merge_code - kind: runImage name: test_hail_python numSplits: 28 @@ -3040,47 +2978,6 @@ steps: inputs: - from: /repo/hail/python/hailtop to: /io/hailtop - - kind: createDatabase2 - name: notebook_database - databaseName: notebook - image: - valueFrom: ci_utils_image.image - migrations: - - name: initial - script: /io/sql/initial.sql - inputs: - - from: /repo/notebook/sql - to: /io/sql - namespace: - valueFrom: default_ns.name - shutdowns: - - kind: Deployment - namespace: - valueFrom: default_ns.name - name: notebook - dependsOn: - - default_ns - - merge_code - - ci_utils_image - - create_test_database_server_config - - deploy_test_db - - kind: deploy - name: deploy_notebook - namespace: - valueFrom: default_ns.name - config: notebook/deployment.yaml - dependsOn: - - default_ns - - create_session_key - - notebook_image - - notebook_nginx_image - - deploy_auth - - notebook_database - - create_certs - wait: - - kind: Service - name: notebook - for: alive - kind: runImage name: cleanup_ci_test_repo resources: diff --git a/dev-docs/hail-overview.md b/dev-docs/hail-overview.md index 659b553d0ed..3a438c788fd 100644 --- a/dev-docs/hail-overview.md +++ b/dev-docs/hail-overview.md @@ -100,7 +100,6 @@ Services (see below for descriptions): * $HAIL/ci * $HAIL/gateway * $HAIL/internal-gateway -* $HAIL/notebook: notebook and workshop services * $HAIL/site Libraries for services: @@ -195,7 +194,7 @@ Some services rely on 3rd party services. Those include: * ci depends on GitHub -* batch, ci and notebook depend on K8s +* batch, ci and auth depend on K8s * batch depends on K8s and GCP @@ -246,12 +245,6 @@ There is a collection of libraries to facilitate service development: connections from the Google Virtual Private Cloud (VPC) network and connections to the services in K8s. -* notebook: The notebook service is a simple Jupyter notebook service - similar to Google Colab. It is available both at - https://notebook.hail.is/ and https://workshop.hail.is/. workshop - is used for running Hail workshops and tutorials. The notebook - service is not currently used. - * site: site implements the main Hail website https://hail.is/ including the landing page and Hail Query and Hail Batch documentation. diff --git a/letsencrypt/subdomains.txt b/letsencrypt/subdomains.txt index 2e3da4d5bfe..5e923b04cb8 100644 --- a/letsencrypt/subdomains.txt +++ b/letsencrypt/subdomains.txt @@ -1,5 +1,4 @@ ci -notebook www batch batch-driver @@ -8,7 +7,6 @@ memory monitoring auth ukbb-rg -workshop grafana prometheus hello diff --git a/notebook/.gitignore b/notebook/.gitignore deleted file mode 100644 index 04f2a0eea21..00000000000 --- a/notebook/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -/deployment.yaml -/notebook-image -/notebook-worker-images -/notebook/static/css diff --git a/notebook/Dockerfile b/notebook/Dockerfile deleted file mode 100644 index 62ebeb605d6..00000000000 --- a/notebook/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -FROM {{ hail_ubuntu_image.image }} - -COPY hail/python/hailtop/pinned-requirements.txt hailtop-requirements.txt -COPY gear/pinned-requirements.txt gear-requirements.txt -COPY web_common/pinned-requirements.txt web_common-requirements.txt -RUN hail-pip-install \ - -r hailtop-requirements.txt \ - -r gear-requirements.txt \ - -r web_common-requirements.txt - -COPY hail/python/setup-hailtop.py /hailtop/setup.py -COPY hail/python/MANIFEST.in /hailtop/MANIFEST.in -COPY hail/python/hailtop /hailtop/hailtop/ - -COPY gear/setup.py /gear/setup.py -COPY gear/gear /gear/gear/ - -COPY web_common/setup.py web_common/MANIFEST.in /web_common/ -COPY web_common/web_common /web_common/web_common/ - -COPY notebook/setup.py notebook/MANIFEST.in /notebook/ -COPY notebook/notebook /notebook/notebook/ - -RUN hail-pip-install /hailtop /gear /web_common /notebook - -EXPOSE 5000 - -CMD ["python3", "-m", "notebook"] diff --git a/notebook/Dockerfile.nginx b/notebook/Dockerfile.nginx deleted file mode 100644 index 009c0ab1657..00000000000 --- a/notebook/Dockerfile.nginx +++ /dev/null @@ -1,13 +0,0 @@ -FROM {{ hail_ubuntu_image.image }} - -RUN hail-apt-get-install nginx - -RUN rm -f /etc/nginx/sites-enabled/default && \ - rm -f /etc/nginx/nginx.conf -ADD nginx.conf.out /etc/nginx/nginx.conf -ADD proxy.conf /etc/nginx/ - -RUN ln -sf /dev/stdout /var/log/nginx/access.log -RUN ln -sf /dev/stderr /var/log/nginx/error.log - -CMD ["nginx", "-g", "daemon off;"] diff --git a/notebook/MANIFEST.in b/notebook/MANIFEST.in deleted file mode 100644 index 848b9c4cc76..00000000000 --- a/notebook/MANIFEST.in +++ /dev/null @@ -1,2 +0,0 @@ -recursive-include notebook/templates * -recursive-include notebook/styles * diff --git a/notebook/Makefile b/notebook/Makefile deleted file mode 100644 index 53a9feb9448..00000000000 --- a/notebook/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -include ../config.mk - -NOTEBOOK_NGINX_IMAGE := $(DOCKER_PREFIX)/notebook_nginx:$(TOKEN) - -.PHONY: build-notebook -build-notebook: - $(MAKE) -C .. notebook-image - -.PHONY: build-nginx -build-nginx: - $(MAKE) -C .. hail-ubuntu-image - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../hail-ubuntu-image)'"}}' Dockerfile.nginx Dockerfile.nginx.out - python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out - ../docker-build.sh . Dockerfile.nginx.out $(NOTEBOOK_NGINX_IMAGE) - -.PHONY: build -build: build-notebook build-nginx - -JINJA_ENVIRONMENT = '{"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"notebook_image":{"image":"$(shell cat ../notebook-image)"},"notebook_nginx_image":{"image":"$(NOTEBOOK_NGINX_IMAGE)"},"default_ns":{"name":"$(NAMESPACE)"},"notebook_database":{"user_secret_name":"sql-notebook-user-config"},"scope":"$(SCOPE)"}' - -.PHONY: deploy -deploy: build - ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - E=$(JINJA_ENVIRONMENT) && \ - python3 ../ci/jinja2_render.py $$E deployment.yaml deployment.yaml.out && \ - kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/notebook/deployment.yaml b/notebook/deployment.yaml deleted file mode 100644 index aac544d7c44..00000000000 --- a/notebook/deployment.yaml +++ /dev/null @@ -1,232 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: notebook ---- -kind: Role -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: notebook -rules: - - apiGroups: [""] - resources: ["pods"] - verbs: ["*"] ---- -kind: RoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: notebook-notebook -subjects: - - kind: ServiceAccount - name: notebook -roleRef: - kind: Role - name: notebook - apiGroup: "" ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: notebook - labels: - app: notebook - hail.is/sha: "{{ code.sha }}" -spec: - selector: - matchLabels: - app: notebook -{% if scope == "deploy" %} - replicas: 3 -{% elif scope == "test" or scope == "dev" %} - replicas: 1 -{% else %} -!!! unexpected scope {{ scope }} !!! -{% endif %} - template: - metadata: - labels: - app: notebook - hail.is/sha: "{{ code.sha }}" - grafanak8sapp: "true" - spec: - hostAliases: - - ip: "127.0.0.1" - hostnames: - - "notebook.local" - - "workshop.local" - serviceAccountName: notebook -{% if deploy %} - priorityClassName: production -{% endif %} - nodeSelector: - preemptible: "true" - tolerations: - - key: preemptible - value: "true" - - key: "kubernetes.azure.com/scalesetpriority" - value: "spot" - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: "app" - operator: In - values: - - notebook - topologyKey: "kubernetes.io/hostname" - containers: - - name: notebook - image: "{{ notebook_image.image }}" - resources: - requests: - cpu: "5m" - memory: "20M" - limits: - cpu: "1" - memory: "200M" - env: - - name: HAIL_DEPLOY_CONFIG_FILE - value: /deploy-config/deploy-config.json - - name: HAIL_DOMAIN - valueFrom: - secretKeyRef: - name: global-config - key: domain - - name: HAIL_NOTEBOOK_NAMESPACE - value: "{{ default_ns.name }}" - - name: HAIL_NOTEBOOK_WORKER_IMAGE - value: gcr.io/hail-vdc/hail-jupyter:latest - ports: - - containerPort: 5000 - volumeMounts: - - name: deploy-config - mountPath: /deploy-config - readOnly: true - - name: global-config - mountPath: /global-config - readOnly: true - - name: session-secret-key - mountPath: /session-secret-key - readOnly: true - - name: sql-config - mountPath: /sql-config - readOnly: true - - name: ssl-config-notebook-python - mountPath: /ssl-config - readOnly: true - - name: nginx - image: {{ notebook_nginx_image.image }} - resources: - requests: - cpu: "5m" - memory: "20M" - limits: - cpu: "1" - memory: "1G" - ports: - - containerPort: 443 - volumeMounts: - - name: ssl-config-notebook - mountPath: /ssl-config - readOnly: true - readinessProbe: - tcpSocket: - port: 443 - initialDelaySeconds: 5 - periodSeconds: 5 - volumes: - - name: deploy-config - secret: - optional: false - secretName: deploy-config - - name: global-config - secret: - optional: false - secretName: global-config - - name: session-secret-key - secret: - optional: false - secretName: session-secret-key - - name: sql-config - secret: - optional: false - secretName: "{{ notebook_database.user_secret_name }}" - - name: ssl-config-notebook - secret: - optional: false - secretName: ssl-config-notebook - - name: ssl-config-notebook-python - secret: - optional: false - secretName: ssl-config-notebook-python ---- -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: notebook -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: notebook -{% if scope == "deploy" %} - minReplicas: 3 - maxReplicas: 10 -{% elif scope == "test" or scope == "dev" %} - minReplicas: 1 - maxReplicas: 3 -{% else %} -!!! unexpected scope {{ scope }} !!! -{% endif %} - metrics: - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: 2500 ---- -apiVersion: policy/v1 -kind: PodDisruptionBudget -metadata: - name: notebook -spec: -{% if scope == "deploy" %} - minAvailable: 2 -{% elif scope == "test" or scope == "dev" %} - minAvailable: 0 -{% else %} -!!! unexpected scope {{ scope }} !!! -{% endif %} - selector: - matchLabels: - app: notebook ---- -apiVersion: v1 -kind: Service -metadata: - name: notebook - labels: - app: notebook -spec: - ports: - - port: 443 - protocol: TCP - targetPort: 443 - selector: - app: notebook ---- -apiVersion: v1 -kind: Service -metadata: - name: workshop - labels: - app: workshop -spec: - ports: - - port: 443 - protocol: TCP - targetPort: 443 - selector: - app: notebook diff --git a/notebook/images/hail/Dockerfile b/notebook/images/hail/Dockerfile deleted file mode 100644 index 635aa7bce79..00000000000 --- a/notebook/images/hail/Dockerfile +++ /dev/null @@ -1,26 +0,0 @@ -FROM jupyter/scipy-notebook:c094bb7219f9 -MAINTAINER Hail Team - -USER root -RUN apt-get update && apt-get install -y \ - openjdk-8-jre-headless \ - && rm -rf /var/lib/apt/lists/* -USER jovyan - -RUN pip install --upgrade --no-cache-dir \ - jupyter \ - jupyter-spark \ - "tornado<6" \ - hail==0.2.54 \ - jupyter_contrib_nbextensions \ - && \ - pip check && \ - jupyter serverextension enable --user --py jupyter_spark && \ - jupyter nbextension install --user --py jupyter_spark && \ - jupyter contrib nbextension install --user && \ - jupyter nbextension enable --user --py jupyter_spark && \ - jupyter nbextension enable --user --py widgetsnbextension && \ - jupyter nbextension enable --user collapsible_headings/main && \ - jupyter nbextension enable --user move_selected_cells/main - -COPY ./resources/ /home/jovyan diff --git a/notebook/images/hail/resources/Hail-Workshop-Notebook.ipynb b/notebook/images/hail/resources/Hail-Workshop-Notebook.ipynb deleted file mode 100644 index dc85349b68b..00000000000 --- a/notebook/images/hail/resources/Hail-Workshop-Notebook.ipynb +++ /dev/null @@ -1,1465 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hail workshop\n", - "\n", - "This notebook will introduce the following concepts:\n", - "\n", - " - Using Jupyter notebooks effectively\n", - " - Loading genetic data into Hail\n", - " - General-purpose data exploration functionality\n", - " - Plotting functionality\n", - " - Quality control of sequencing data\n", - " - Running a Genome-Wide Association Study (GWAS)\n", - " - Rare variant burden tests\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Hail on Jupyter\n", - "\n", - "From https://jupyter.org: \n", - "\n", - "\"The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain live code, equations, visualizations and narrative text. Uses include: data cleaning and transformation, numerical simulation, statistical modeling, data visualization, machine learning, and much more.\"\n", - "\n", - "In the last year, the Jupyter development team [released Jupyter Lab](https://blog.jupyter.org/jupyterlab-is-ready-for-users-5a6f039b8906), an integrated environment for data, code, and visualizations. If you've used R Studio, this is the closest thing that works in Python (and many other languages!)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Why notebooks?\n", - "\n", - "Part of what we think is so exciting about Hail is that it has coincided with a larger shift in the data science community.\n", - "\n", - "Three years ago, most computational biologists at Broad analyzed genetic data using command-line tools, and took advantage of research compute clusters by explicitly using scheduling frameworks like LSF or Sun Grid Engine.\n", - "\n", - "Now, they have the option to use Hail in interactive Python notebooks backed by thousands of cores on public compute clouds like [Google Cloud](https://cloud.google.com/), [Amazon Web Services](https://aws.amazon.com/), or [Microsoft Azure](https://azure.microsoft.com/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Using Jupyter" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Running cells\n", - "\n", - "Evaluate cells using `SHIFT + ENTER`. Select the next cell and run it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Hello, world')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Modes\n", - "\n", - "Jupyter has two modes, a **navigation mode** and an **editor mode**.\n", - "\n", - "#### Navigation mode:\n", - "\n", - " - BLUE cell borders\n", - " - `UP` / `DOWN` move between cells\n", - " - `ENTER` while a cell is selected will move to **editing mode**.\n", - " - Many letters are keyboard shortcuts! This is a common trap.\n", - " \n", - "#### Editor mode:\n", - "\n", - " - GREEN cell borders\n", - " - `UP` / `DOWN`/ move within cells before moving between cells.\n", - " - `ESC` will return to **navigation mode**.\n", - " - `SHIFT + ENTER` will evaluate a cell and return to **navigation mode**." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Cell types\n", - "\n", - "There are several types of cells in Jupyter notebooks. The two you will see here are **Markdown** (text) and **Code**." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is a code cell\n", - "my_variable = 5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**This is a markdown cell**, so even if something looks like code (as below), it won't get executed!\n", - "\n", - "my_variable += 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(my_variable)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common gotcha: a code cell turns into markdown\n", - "\n", - "This can happen if you are in **navigation mode** and hit the keyboard shortcut `m` while selecting a code cell.\n", - "\n", - "You can either navigate to `Cell > Cell Type > Code` through the top menu, or use the keyboard shortcut `y` to turn it back to code." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Tips and tricks\n", - "\n", - "Keyboard shortcuts:\n", - "\n", - " - `SHIFT + ENTER` to evaluate a cell\n", - " - `ESC` to return to navigation mode\n", - " - `y` to turn a markdown cell into code\n", - " - `m` to turn a code cell into markdown\n", - " - `a` to add a new cell **above** the currently selected cell\n", - " - `b` to add a new cell **below** the currently selected cell\n", - " - `d, d` (repeated) to delete the currently selected cell\n", - " - `TAB` to activate code completion\n", - " \n", - "To try this out, create a new cell below this one using `b`, and print `my_variable` by starting with `print(my` and pressing `TAB`!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common gotcha: the state of your code seems wrong\n", - "\n", - "Jupyter makes it easy to get yourself into trouble by executing cells out-of-order, or multiple times.\n", - "\n", - "For example, if I declare `x`:\n", - "\n", - "```\n", - "x = 5\n", - "```\n", - "\n", - "Then have a cell that reads:\n", - "\n", - "```\n", - "x += 1\n", - "```\n", - "\n", - "And finally:\n", - "\n", - "```\n", - "print(x)\n", - "```\n", - "\n", - "If you execute these cells in order and once, I'll see the notebook print `6`. However, there is **nothing stopping you** from executing the middle cell ten times, printing `16`!\n", - "\n", - "### Solution\n", - "\n", - "If you get yourself into trouble into this way, the solution is to clear the kernel (Python process) and start again from the top.\n", - "\n", - "First, `Kernel > Restart & Clear Output > (accept dialog)`.\n", - "\n", - "Second, `Cell > Run all above`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Set up our Python environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In addition to Hail, we import a few methods from the [bokeh](https://bokeh.pydata.org/en/latest/) plotting library. We'll see examples soon!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import hail as hl\n", - "from bokeh.io import output_notebook, show" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we initialize Hail and set up Bokeh to display inline in the notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.init()\n", - "output_notebook()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Download public 1000 Genomes data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The workshop materials are designed to work on a small (~20MB) downsampled chunk of the public 1000 Genomes dataset.\n", - "\n", - "You can run these same functions on your computer or on the cloud!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.utils.get_1kg('data/')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is possible to call command-line utilities from Jupyter by prefixing a line with a `!`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! ls -1 data/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 1: Explore genetic data with Hail" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import data from VCF\n", - "\n", - "The [Variant Call Format (VCF)](https://en.wikipedia.org/wiki/Variant_Call_Format) is a common file format for representing genetic data collected on multiple individuals (samples).\n", - "\n", - "Hail's [import_vcf](https://hail.is/docs/0.2/methods/impex.html#hail.methods.import_vcf) function can read this format.\n", - "\n", - "However, VCF is a text format that is easy for humans but very bad for computers. The first thing we do is `write` to a Hail native file format, which is much faster!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.import_vcf('data/1kg.vcf.bgz').write('data/1kg.mt', overwrite=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read 1KG into Hail\n", - "\n", - "We represent genetic data as a Hail [MatrixTable](https://hail.is/docs/0.2/overview/matrix_table.html), and name our variable `mt` to indicate this." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.read_matrix_table('data/1kg.mt')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is a `MatrixTable`?\n", - "\n", - "Let's describe it!\n", - "\n", - "The `describe` method prints the **schema**, that is, the fields in the dataset and their types.\n", - "\n", - "You can see:\n", - " - **numeric** types:\n", - " - integers (`int32`, `int64`), e.g. `5`\n", - " - floating point numbers (`float32`, `float64`), e.g. `5.5` or `3e-8`\n", - " - **strings** (`str`), e.g. `\"Foo\"`\n", - " - **boolean** values (`bool`) e.g. `True`\n", - " - **collections**:\n", - " - arrays (`array`), e.g. `[1,1,2,3]`\n", - " - sets (`set`), e.g. `{1,3}`\n", - " - dictionaries (`dict`), e.g. `{'Foo': 5, 'Bar': 10}`\n", - " - **genetic data types**:\n", - " - loci (`locus`), e.g. `[GRCh37] 1:10000` or `[GRCh38] chr1:10024`\n", - " - genotype calls (`call`), e.g. `0/2` or `1|0`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `count`\n", - "\n", - "`MatrixTable.count` returns a tuple with the number of rows (variants) and number of columns (samples)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.count()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `show`\n", - "\n", - "There is no `mt.show()` method, but you can show individual fields like the sample ID (`s`), or the locus (`locus`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.s.show(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.locus.show(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: show other fields\n", - "\n", - "You can see the names of fields above. `show()` the first few values for a few of them, making sure to include at least one **row field** and **at least one entry field**. Capitalization is important.\n", - "\n", - "To print fields inside the `info` structure, you must add another dot, e.g. `mt.info.AN`.\n", - "\n", - "What do you notice being printed alongside some of the fields?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Hail has functions built for genetics\n", - "\n", - "For example, `hl.summarize_variants` prints useful statistics about the genetic variants in the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.summarize_variants(mt)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Most of Hail's functionality is totally general-purpose!\n", - "\n", - "Functions like `summarize_variants` are built out of Hail's general-purpose data manipulation functionality. We can use Hail to ask arbitrary questions about the data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "mt.aggregate_rows(hl.agg.count_where(mt.alleles == ['A', 'T']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or if we had travel data:\n", - "\n", - "```\n", - "data.aggregate(\n", - " hl.agg.count_where(data.departure_city == 'Boston')\n", - ")\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `counter` aggregator makes it possible to see distributions of categorical data, like alleles:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "snp_counts = mt.aggregate_rows(\n", - " hl.array(hl.agg.counter(mt.alleles)))\n", - "snp_counts" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By sorting the result in Python, we can recover an interesting bit of biology..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sorted(snp_counts,\n", - " key=lambda x: x[1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Question: What is interesting about this distribution?\n", - "\n", - "### Question: Why do the counts come in pairs?\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 2: Annotation and quality control" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Integrate sample information\n", - "\n", - "We're building toward a genome-wide association test in part 3, but we don't just need genetic data to do a GWAS -- we also need phenotype data! Luckily, our `hl.utils.get_1kg` function also downloaded some simulated phenotype data.\n", - "\n", - "This is a text file:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! head data/1kg_annotations.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can import it as a [Hail Table](https://hail.is/docs/0.2/overview/table.html) with [hl.import_table](https://hail.is/docs/0.2/methods/impex.html?highlight=import_table#hail.methods.import_table).\n", - "\n", - "We call it \"sa\" for \"sample annotations\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sa = hl.import_table('data/1kg_annotations.txt', \n", - " impute=True, \n", - " key='Sample')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "While we can see the names and types of fields in the logging messages, we can also `describe` and `show` this table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sa.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sa.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Add sample metadata into our 1KG `MatrixTable`\n", - "\n", - "It's short and easy:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.annotate_cols(pheno = sa[mt.s])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What's going on here?\n", - "\n", - "Understanding what's going on here is a bit more difficult. To understand, we need to understand a few pieces:\n", - "\n", - "#### 1. `annotate` methods\n", - "\n", - "In Hail, `annotate` methods refer to **adding new fields**. \n", - "\n", - " - `MatrixTable`'s `annotate_cols` adds new column fields.\n", - " - `MatrixTable`'s `annotate_rows` adds new row fields.\n", - " - `MatrixTable`'s `annotate_entries` adds new entry fields.\n", - " - `Table`'s `annotate` adds new row fields.\n", - "\n", - "In the above cell, we are adding a new column field called \"pheno\". This field should be the values in our table `sa` associated with the sample ID `s` in our `MatrixTable` - that is, this is performing a **join**.\n", - "\n", - "Python uses square brackets to look up values in dictionaries:\n", - "\n", - " d = {'foo': 5, 'bar': 10}\n", - " d['foo']\n", - "\n", - "You should think of this in much the same way - for each column of `mt`, we are looking up the fields in `sa` using the sample ID `s`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: Query some of these column fields using `mt.aggregate_cols`.\n", - "\n", - "Some of the aggregators we used earlier:\n", - " - `hl.agg.counter`\n", - " - `hl.agg.stats`\n", - " - `hl.agg.count_where`\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sample QC\n", - "\n", - "We'll start with examples of sample QC.\n", - "\n", - "Hail has the function [hl.sample_qc](https://hail.is/docs/0.2/methods/genetics.html#hail.methods.sample_qc) to compute a list of useful statistics about samples from sequencing data.\n", - "\n", - "**Click the link** above to see the documentation, which lists the fields and their descriptions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.sample_qc(mt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.sample_qc.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.scatter(x=mt.sample_qc.r_het_hom_var,\n", - " y=mt.sample_qc.call_rate)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: Plot some other fields!\n", - "\n", - "Modify the cell above. Remember `hl.plot.histogram` as well!\n", - "\n", - "If you want to start getting fancy, you can plot more complicated expressions -- the ratio between two fields, for instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Filter columns using generated QC statistics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.filter_cols(mt.sample_qc.dp_stats.mean >= 4)\n", - "mt = mt.filter_cols(mt.sample_qc.call_rate >= 0.97)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Genotype QC\n", - "\n", - "We explored GQ above, and analysts often set thresholds for GQ to filter entries (genotypes). Another useful metric is **allele read balance**.\n", - "\n", - "This value is defined by:\n", - "\n", - "$\\quad AB = \\dfrac{N_{alt}}{{N_{ref} + N_{alt}}}$\n", - "\n", - "Where $N_{ref}$ is the number of reference reads and $N_{alt}$ is the number of alternate reads.\n", - "\n", - "We want " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# call rate before filtering\n", - "mt.aggregate_entries(hl.agg.fraction(hl.is_defined(mt.GT)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ab = mt.AD[1] / hl.sum(mt.AD)\n", - "\n", - "filter_condition_ab = (\n", - " hl.case()\n", - " .when(mt.GT.is_hom_ref(), ab <= 0.1)\n", - " .when(mt.GT.is_het(), (ab >= 0.25) & (ab <= 0.75))\n", - " .default(ab >= 0.9) # hom-var\n", - ")\n", - "\n", - "mt = mt.filter_entries(filter_condition_ab)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# call rate after filtering\n", - "mt.aggregate_entries(hl.agg.fraction(hl.is_defined(mt.GT)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Variant QC\n", - "\n", - "Hail has the function [hl.variant_qc](https://hail.is/docs/0.2/methods/genetics.html#hail.methods.variant_qc) to compute a list of useful statistics about **variants** from sequencing data.\n", - "\n", - "Once again, **Click the link** above to see the documentation!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.variant_qc(mt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "mt.variant_qc.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.variant_qc.AF.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Remove rare sites:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.filter_rows(hl.min(mt.variant_qc.AF) > 1e-6)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Remove sites far from [Hardy-Weinberg equilbrium](https://en.wikipedia.org/wiki/Hardy%E2%80%93Weinberg_principle):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.filter_rows(mt.variant_qc.p_value_hwe > 0.005)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# final variant and sample count\n", - "mt.count()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 3: GWAS!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A GWAS is an independent association test performed per variant of a genetic dataset. We use the same phenotype and covariates, but test the genotypes for each variant separately. \n", - "\n", - "In Hail, the method we use is [hl.linear_regression_rows](https://hail.is/docs/0.2/methods/stats.html#hail.methods.linear_regression_rows).\n", - "\n", - "We use the phenotype `CaffeineConsumption` as our dependent variable, the number of alternate alleles as our independent variable, and no covariates besides an intercept term (that's the `1.0`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gwas = hl.linear_regression_rows(y=mt.pheno.CaffeineConsumption, \n", - " x=mt.GT.n_alt_alleles(), \n", - " covariates=[1.0])\n", - "gwas.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Two of the plots that analysts generally produce are a [Manhattan plot](https://en.wikipedia.org/wiki/Manhattan_plot) and a [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot).\n", - "\n", - "We'll start with the Manhattan plot:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.manhattan(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.qq(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Confounded!\n", - "\n", - "The Q-Q plot indicates **extreme** inflation of p-values.\n", - "\n", - "If you've done a GWAS before, you've probably included a few other covariates -- age, sex, and principal components.\n", - "\n", - "Principal components are a measure of genetic ancestry, and can be used to control for [population stratification](https://en.wikipedia.org/wiki/Population_stratification).\n", - "\n", - "We can compute principal components with Hail:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_eigenvalues, pca_scores, pca_loadings = hl.hwe_normalized_pca(mt.GT, compute_loadings=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **eigenvalues** reflect the amount of variance explained by each principal component:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_eigenvalues" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **scores** are the principal components themselves, computed per sample." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_scores.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_scores.scores[0].show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **loadings** are the contributions to each component for each variant." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_loadings.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can **annotate** the principal components back onto `mt`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.annotate_cols(pca = pca_scores[mt.s])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Principal components measure ancestry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "p = hl.plot.scatter(mt.pca.scores[0], \n", - " mt.pca.scores[1],\n", - " label=mt.pheno.SuperPopulation)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Question: Does your plot match your neighbors'?\n", - "\n", - "If not, how is it different?\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Control confounders and run another GWAS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gwas = hl.linear_regression_rows(\n", - " y=mt.pheno.CaffeineConsumption, \n", - " x=mt.GT.n_alt_alleles(),\n", - " covariates=[1.0, mt.pheno.isFemale, mt.pca.scores[0], mt.pca.scores[1], mt.pca.scores[2]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.qq(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.manhattan(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 4: Burden tests" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "GWAS is a great tool for finding associations between **common variants** and disease, but a GWAS can't hope to find associations between rare variants and disease. Even if we have sequencing data for 1,000,000 people, we won't have the statistical power to link a mutation found in only a few people to any disease.\n", - "\n", - "But rare variation has lots of information - especially because statistical genetic theory dictates that rarer variants have, on average, stronger effects on disease per allele.\n", - "\n", - "One possible strategy is to **group together rare variants with similar predicted consequence**. For example, we can group all variants that are predicted to knock out the function of each gene and test the variants for each gene as a group.\n", - "\n", - "We will be running a burden test on our common variant dataset to demonstrate the technical side, but we shouldn't hope to find anything here -- especially because we've only got 10,000 variants!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import gene data\n", - "\n", - "We start by importing gene names and coordinates." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht = hl.import_table('data/ensembl_gene_annotations.txt', impute=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht.count()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an interval key" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht = gene_ht.transmute(interval = hl.locus_interval(gene_ht['Chromosome'],\n", - " gene_ht['Gene start'],\n", - " gene_ht['Gene end'], \n", - " reference_genome='GRCh37'))\n", - "gene_ht = gene_ht.key_by('interval')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Annotate variants using these intervals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.annotate_rows(gene_info = gene_ht[mt.locus])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.gene_info.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Aggregate genotypes per gene\n", - "\n", - "There is no `hl.burden_test` function -- instead, a burden test is the composition of two modular pieces of Hail functionality:\n", - "\n", - " - `group_rows_by / aggregate`\n", - " - `hl.linear_regression_rows`.\n", - " \n", - "While this might be a few more lines of code to write than `hl.burden_test`, it means that you can flexibly specify the genotype aggregation however you like. Using other tools , you may have a few ways to aggregate, but if you want to do something different you are out of luck!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_mt = (\n", - " mt\n", - " .group_rows_by(gene = mt.gene_info['Gene name'])\n", - " .aggregate(n_variants = hl.agg.count_where(mt.GT.n_alt_alleles() > 0))\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is `burden_mt`?\n", - "\n", - "It is a **gene-by-sample** matrix (compare to `mt`, a **variant-by-sample** matrix).\n", - "\n", - "It has one row field, the `gene`.\n", - "\n", - "It has one entry field, `n_variants`.\n", - "\n", - "It has all the column fields from `mt`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run linear regression per gene\n", - "\n", - "This should look familiar!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_results = hl.linear_regression_rows(\n", - " y=burden_mt.pheno.CaffeineConsumption, \n", - " x=burden_mt.n_variants,\n", - " covariates=[1.0, \n", - " burden_mt.pheno.isFemale, \n", - " burden_mt.pca.scores[0], \n", - " burden_mt.pca.scores[1], \n", - " burden_mt.pca.scores[2]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Sorry, no `hl.plot.manhattan` for genes!\n", - "\n", - "Instead, we can sort by p-value and print:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_results.order_by(burden_results.p_value).show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: Where along the genome can we find the top gene?" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 5: Whirlwind tour\n", - "\n", - "You've seen just a very small fraction of the functionality in Hail. Here are a few examples of things that a large library makes easy." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Find related individuals using IBD (identity by descent)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ht = hl.identity_by_descent(mt).cache()\n", - "\n", - "ht.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ht.filter(ht.ibd.PI_HAT > 0.20).show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Infer sex from X-chromosome data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ht = hl.impute_sex(mt.GT).cache()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ht.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Simulate genetic data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sim_mt = hl.balding_nichols_model(n_populations=3,\n", - " n_samples=1000, \n", - " n_variants=1000)\n", - "\n", - "# simulate variant effects using spike-and-slab model\n", - "spike_prob = 0.2\n", - "sim_mt = sim_mt.annotate_rows(beta = hl.rand_bool(spike_prob) * hl.rand_norm(0, 1))\n", - "\n", - "# compute risk scores from betas\n", - "sim_mt = sim_mt.annotate_cols(risk = hl.agg.sum(sim_mt.beta * sim_mt.GT.n_alt_alleles()) / sim_mt.count_rows())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "show(hl.plot.histogram(sim_mt.risk))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# The case for modularity\n", - "\n", - "Most of the \"black-box\" methods we've used above (`impute_sex`, `variant_qc`, `sample_qc`, etc) are actually implemented on top of Hail's Python interface using `Table` and `MatrixTable` operations, expressions, aggregations, and linear algebra!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": { - "height": "242px", - "width": "283px" - }, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/notebook/nginx.conf b/notebook/nginx.conf deleted file mode 100644 index df0dcc46ec7..00000000000 --- a/notebook/nginx.conf +++ /dev/null @@ -1,190 +0,0 @@ -worker_processes auto; -pid /run/nginx.pid; -include /etc/nginx/modules-enabled/*.conf; - -events { - worker_connections 768; -} - -http { - - sendfile on; - tcp_nopush on; - tcp_nodelay on; - keepalive_timeout 65; - types_hash_max_size 2048; - server_names_hash_bucket_size 128; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE - ssl_prefer_server_ciphers on; - - log_format json-log escape=json '{' - '"message":"$scheme $request done in ${request_time}s: $status",' - '"response_status":$status,' - '"request_duration":$request_time,' - '"remote_address":"$remote_addr",' - '"x_real_ip":"$http_x_real_ip",' - '"request_start_time":"$time_local",' - '"body_bytes_sent":"$body_bytes_sent",' - '"http_referer":"$http_referer",' - '"http_user_agent":"$http_user_agent"' - '}'; - - access_log /var/log/nginx/access.log json-log; - error_log /var/log/nginx/error.log; - - gzip on; - - include /ssl-config/ssl-config-http.conf; - map $http_x_forwarded_proto $updated_scheme { - default $http_x_forwarded_proto; - '' $scheme; - } - map $http_x_forwarded_host $updated_host { - default $http_x_forwarded_host; - '' $http_host; - } - map $http_upgrade $connection_upgrade { - default upgrade; - '' close; - } - - server { - server_name notebook.*; - - # needed to correctly handle error_page with internal handles - recursive_error_pages on; - - location = /auth { - internal; -{% if deploy %} - proxy_pass https://notebook.local:5000/auth/$notebook_token; -{% else %} - proxy_pass https://notebook.local:5000/{{ default_ns.name }}/notebook/auth/$notebook_token; -{% endif %} - } - -{% if deploy %} - location ~ /instance/([^/]+)/(.*) { -{% else %} - location ~ {{ default_ns.name }}/notebook/instance/([^/]+)/(.*) { -{% endif %} - set $notebook_token $1; - auth_request /auth; - auth_request_set $auth_pod_ip $upstream_http_pod_ip; - - proxy_pass http://$auth_pod_ip$request_uri; - - include /etc/nginx/proxy.conf; - proxy_http_version 1.1; - proxy_redirect off; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_read_timeout 86400; - - proxy_connect_timeout 5s; - - proxy_intercept_errors on; - error_page 401 403 502 504 = @error; - } - - location @error { -{% if deploy %} - return 302 $updated_scheme://$updated_host/error; -{% else %} - return 302 $updated_scheme://$updated_host/{{ default_ns.name }}/notebook/error; -{% endif %} - } - - location / { - proxy_pass https://notebook.local:5000; - - # don't set Host, notebook dispatches off domain - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Host $http_host; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Real-IP $http_x_real_ip; - proxy_http_version 1.1; - proxy_redirect off; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_read_timeout 86400; - } - - listen 443 ssl; - listen [::]:443 ssl; - } - - server { - server_name workshop.*; - - # needed to correctly handle error_page with internal handles - recursive_error_pages on; - - location = /auth { - internal; -{% if deploy %} - proxy_pass https://workshop.local:5000/auth/$notebook_token; -{% else %} - proxy_pass https://workshop.local:5000/{{ default_ns.name }}/workshop/auth/$notebook_token; -{% endif %} - } - -{% if deploy %} - location ~ /instance/([^/]+)/(.*) { -{% else %} - location ~ {{ default_ns.name }}/workshop/instance/([^/]+)/(.*) { -{% endif %} - set $notebook_token $1; - auth_request /auth; - auth_request_set $auth_pod_ip $upstream_http_pod_ip; - - proxy_pass http://$auth_pod_ip$request_uri; - - include /etc/nginx/proxy.conf; - proxy_http_version 1.1; - proxy_redirect off; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_read_timeout 86400; - - proxy_connect_timeout 5s; - - proxy_intercept_errors on; - error_page 401 403 502 504 = @error; - } - - location @error { -{% if deploy %} - return 302 $updated_scheme://$updated_host/error; -{% else %} - return 302 $updated_scheme://$updated_host/{{ default_ns.name }}/workshop/error; -{% endif %} - } - - location / { - proxy_pass https://workshop.local:5000; - - # don't set Host, notebook dispatches off domain - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Host $http_host; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Real-IP $http_x_real_ip; - proxy_http_version 1.1; - proxy_redirect off; - proxy_buffering off; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_read_timeout 86400; - } - - listen 443 ssl; - listen [::]:443 ssl; - } -} diff --git a/notebook/notebook/__init__.py b/notebook/notebook/__init__.py deleted file mode 100644 index 4070fbe31c9..00000000000 --- a/notebook/notebook/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .notebook import run - -__all__ = ['run'] diff --git a/notebook/notebook/__main__.py b/notebook/notebook/__main__.py deleted file mode 100644 index 092752b9ba7..00000000000 --- a/notebook/notebook/__main__.py +++ /dev/null @@ -1,7 +0,0 @@ -from hailtop.hail_logging import configure_logging - -configure_logging() - -from .notebook import run # noqa: E402 pylint: disable=wrong-import-position - -run() diff --git a/notebook/notebook/notebook.py b/notebook/notebook/notebook.py deleted file mode 100644 index 0f7d3cd5f3a..00000000000 --- a/notebook/notebook/notebook.py +++ /dev/null @@ -1,780 +0,0 @@ -import asyncio -import logging -import os -import secrets -from functools import wraps - -import aiohttp -import aiohttp_session -import aiohttp_session.cookie_storage -import kubernetes_asyncio.client -import kubernetes_asyncio.client.rest -import kubernetes_asyncio.config -import pymysql -from aiohttp import web -from prometheus_async.aio.web import server_stats # type: ignore - -from gear import AuthClient, check_csrf_token, create_database_pool, monitor_endpoints_middleware, setup_aiohttp_session -from gear.cloud_config import get_global_config -from hailtop import httpx -from hailtop.config import get_deploy_config -from hailtop.hail_logging import AccessLogger -from hailtop.tls import internal_server_ssl_context -from web_common import render_template, sass_compile, set_message, setup_aiohttp_jinja2, setup_common_static_routes - -log = logging.getLogger('notebook') - -NOTEBOOK_NAMESPACE = os.environ['HAIL_NOTEBOOK_NAMESPACE'] - -deploy_config = get_deploy_config() - -routes = web.RouteTableDef() - -auth = AuthClient() - -# Must be int for Kubernetes V1 api timeout_seconds property -KUBERNETES_TIMEOUT_IN_SECONDS = float(os.environ.get('KUBERNETES_TIMEOUT_IN_SECONDS', 5)) - -POD_PORT = 8888 - -DEFAULT_WORKER_IMAGE = os.environ['HAIL_NOTEBOOK_WORKER_IMAGE'] - -log.info(f'KUBERNETES_TIMEOUT_IN_SECONDS {KUBERNETES_TIMEOUT_IN_SECONDS}') - - -async def workshop_userdata_from_web_request(request): - session = await aiohttp_session.get_session(request) - if 'workshop_session' not in session: - return None - workshop_session = session['workshop_session'] - - # verify this workshop is active - name = workshop_session['workshop_name'] - token = workshop_session['workshop_token'] - - dbpool = request.app['dbpool'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute( - 'SELECT * FROM workshops WHERE name = %s AND token = %s AND active = 1;', (name, token) - ) - workshops = await cursor.fetchall() - - if len(workshops) != 1: - assert len(workshops) == 0 - del session['workshop_session'] - return None - workshop = workshops[0] - - return {'id': workshop_session['id'], 'workshop': workshop} - - -def web_maybe_authenticated_workshop_guest(fun): - @wraps(fun) - async def wrapped(request, *args, **kwargs): - return await fun(request, await workshop_userdata_from_web_request(request), *args, **kwargs) - - return wrapped - - -def web_authenticated_workshop_guest_only(redirect=True): - def wrap(fun): - @web_maybe_authenticated_workshop_guest - @wraps(fun) - async def wrapped(request, userdata, *args, **kwargs): - if not userdata: - if redirect: - raise web.HTTPFound(deploy_config.external_url('workshop', '/login')) - raise web.HTTPUnauthorized() - return await fun(request, userdata, *args, **kwargs) - - return wrapped - - return wrap - - -async def start_pod(k8s, service, userdata, notebook_token, jupyter_token): - service_base_path = deploy_config.base_path(service) - - origin = deploy_config.external_url('workshop', '/').rstrip('/') - - command = [ - 'jupyter', - 'notebook', - f'--NotebookApp.token={jupyter_token}', - f'--NotebookApp.base_url={service_base_path}/instance/{notebook_token}/', - "--ip", - "0.0.0.0", - f"--NotebookApp.allow_origin={origin}", - "--no-browser", - "--allow-root", - ] - - if service == 'notebook': - service_account_name = userdata.get('ksa_name') - - bucket = userdata['bucket_name'] - command.append(f'--GoogleStorageContentManager.default_path="{bucket}"') - - image = DEFAULT_WORKER_IMAGE - - env = [ - kubernetes_asyncio.client.V1EnvVar( - name='HAIL_DEPLOY_CONFIG_FILE', value='/deploy-config/deploy-config.json' - ) - ] - - tokens_secret_name = userdata['tokens_secret_name'] - hail_credentials_secret_name = userdata['hail_credentials_secret_name'] - volumes = [ - kubernetes_asyncio.client.V1Volume( - name='deploy-config', secret=kubernetes_asyncio.client.V1SecretVolumeSource(secret_name='deploy-config') - ), - kubernetes_asyncio.client.V1Volume( - name='gsa-key', - secret=kubernetes_asyncio.client.V1SecretVolumeSource(secret_name=hail_credentials_secret_name), - ), - kubernetes_asyncio.client.V1Volume( - name='user-tokens', - secret=kubernetes_asyncio.client.V1SecretVolumeSource(secret_name=tokens_secret_name), - ), - ] - volume_mounts = [ - kubernetes_asyncio.client.V1VolumeMount(mount_path='/deploy-config', name='deploy-config', read_only=True), - kubernetes_asyncio.client.V1VolumeMount(mount_path='/gsa-key', name='gsa-key', read_only=True), - kubernetes_asyncio.client.V1VolumeMount(mount_path='/user-tokens', name='user-tokens', read_only=True), - ] - resources = kubernetes_asyncio.client.V1ResourceRequirements(requests={'cpu': '1.601', 'memory': '1.601G'}) - else: - workshop = userdata['workshop'] - - service_account_name = None - image = workshop['image'] - env = [] - volumes = [] - volume_mounts = [] - - cpu = workshop['cpu'] - memory = workshop['memory'] - resources = kubernetes_asyncio.client.V1ResourceRequirements( - requests={'cpu': cpu, 'memory': memory}, limits={'cpu': cpu, 'memory': memory} - ) - - pod_spec = kubernetes_asyncio.client.V1PodSpec( - node_selector={'preemptible': 'false'}, - service_account_name=service_account_name, - containers=[ - kubernetes_asyncio.client.V1Container( - command=command, - name='default', - image=image, - env=env, - ports=[kubernetes_asyncio.client.V1ContainerPort(container_port=POD_PORT)], - resources=resources, - volume_mounts=volume_mounts, - ) - ], - volumes=volumes, - ) - - user_id = str(userdata['id']) - pod_template = kubernetes_asyncio.client.V1Pod( - metadata=kubernetes_asyncio.client.V1ObjectMeta( - generate_name='notebook-worker-', labels={'app': 'notebook-worker', 'user_id': user_id} - ), - spec=pod_spec, - ) - pod = await k8s.create_namespaced_pod( - NOTEBOOK_NAMESPACE, pod_template, _request_timeout=KUBERNETES_TIMEOUT_IN_SECONDS - ) - - return pod - - -def notebook_status_from_pod(pod): - pod_ip = pod.status.pod_ip - if not pod_ip: - state = 'Scheduling' - else: - state = 'Initializing' - if pod.status and pod.status.conditions: - for c in pod.status.conditions: - if c.type == 'Ready' and c.status == 'True': - state = 'Initializing' - return {'pod_ip': pod_ip, 'state': state} - - -async def k8s_notebook_status_from_notebook(k8s, notebook): - if not notebook: - return None - - try: - pod = await k8s.read_namespaced_pod( - name=notebook['pod_name'], namespace=NOTEBOOK_NAMESPACE, _request_timeout=KUBERNETES_TIMEOUT_IN_SECONDS - ) - return notebook_status_from_pod(pod) - except kubernetes_asyncio.client.rest.ApiException as e: - if e.status == 404: - log.exception(f"404 for pod: {notebook['pod_name']}") - return None - raise - - -async def notebook_status_from_notebook(client_session: httpx.ClientSession, k8s, service, headers, cookies, notebook): - status = await k8s_notebook_status_from_notebook(k8s, notebook) - if not status: - return None - - if status['state'] == 'Initializing': - if notebook['state'] == 'Ready': - status['state'] = 'Ready' - else: - pod_name = notebook['pod_name'] - - # don't have dev credentials to connect through internal.hail.is - ready_url = deploy_config.external_url( - service, f'/instance/{notebook["notebook_token"]}/?token={notebook["jupyter_token"]}' - ) - try: - async with client_session.get(ready_url, headers=headers, cookies=cookies) as resp: - if resp.status >= 200 and resp.status < 300: - log.info(f'GET on jupyter pod {pod_name} succeeded: {resp}') - status['state'] = 'Ready' - else: - log.info(f'GET on jupyter pod {pod_name} failed: {resp}') - except aiohttp.ServerTimeoutError: - log.exception(f'GET on jupyter pod {pod_name} timed out: {resp}') - - return status - - -async def update_notebook_return_changed(dbpool, user_id, notebook, new_status): - if not new_status: - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('DELETE FROM notebooks WHERE user_id = %s;', user_id) - return True - if new_status['state'] != notebook['state']: - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute( - 'UPDATE notebooks SET state = %s, pod_ip = %s WHERE user_id = %s;', - (new_status['state'], new_status['pod_ip'], user_id), - ) - return True - return False - - -async def get_user_notebook(dbpool, user_id): - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('SELECT * FROM notebooks WHERE user_id = %s;', user_id) - notebooks = await cursor.fetchall() - - if len(notebooks) == 1: - return notebooks[0] - assert len(notebooks) == 0, len(notebooks) - return None - - -async def delete_worker_pod(k8s, pod_name): - try: - await k8s.delete_namespaced_pod(pod_name, NOTEBOOK_NAMESPACE, _request_timeout=KUBERNETES_TIMEOUT_IN_SECONDS) - except kubernetes_asyncio.client.rest.ApiException as e: - log.info(f'pod {pod_name} already deleted {e}') - - -@routes.get('/healthcheck') -async def healthcheck(request): # pylint: disable=unused-argument - return web.Response() - - -@routes.get('') -@routes.get('/') -@auth.web_maybe_authenticated_user -async def index(request, userdata): # pylint: disable=unused-argument - return await render_template('notebook', request, userdata, 'index.html', {}) - - -async def _get_notebook(service, request, userdata): - app = request.app - dbpool = app['dbpool'] - page_context = {'notebook': await get_user_notebook(dbpool, str(userdata['id'])), 'notebook_service': service} - return await render_template(service, request, userdata, 'notebook.html', page_context) - - -async def _post_notebook(service, request, userdata): - app = request.app - dbpool = app['dbpool'] - k8s = app['k8s_client'] - - notebook_token = secrets.token_urlsafe(32) - jupyter_token = secrets.token_hex(16) - - pod = await start_pod(k8s, service, userdata, notebook_token, jupyter_token) - if pod.status.pod_ip: - state = 'Initializing' - else: - state = 'Scheduling' - - user_id = str(userdata['id']) - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute( - ''' -DELETE FROM notebooks WHERE user_id = %s; -INSERT INTO notebooks (user_id, notebook_token, pod_name, state, pod_ip, jupyter_token) VALUES (%s, %s, %s, %s, %s, %s); -''', - (user_id, user_id, notebook_token, pod.metadata.name, state, pod.status.pod_ip, jupyter_token), - ) - - raise web.HTTPFound(location=deploy_config.external_url(service, '/notebook')) - - -async def _delete_notebook(service, request, userdata): - app = request.app - dbpool = app['dbpool'] - k8s = app['k8s_client'] - user_id = str(userdata['id']) - notebook = await get_user_notebook(dbpool, user_id) - if notebook: - await delete_worker_pod(k8s, notebook['pod_name']) - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('DELETE FROM notebooks WHERE user_id = %s;', user_id) - - raise web.HTTPFound(location=deploy_config.external_url(service, '/notebook')) - - -async def _wait_websocket(service, request, userdata): - app = request.app - k8s = app['k8s_client'] - dbpool = app['dbpool'] - client_session: httpx.ClientSession = app['client_session'] - user_id = str(userdata['id']) - notebook = await get_user_notebook(dbpool, user_id) - if not notebook: - return web.HTTPNotFound() - - ws = web.WebSocketResponse() - await ws.prepare(request) - - # forward authorization - headers = {} - if 'Authorization' in request.headers: - headers['Authorization'] = request.headers['Authorization'] - if 'X-Hail-Internal-Authorization' in request.headers: - headers['X-Hail-Internal-Authorization'] = request.headers['X-Hail-Internal-Authorization'] - - cookies = {} - cloud = get_global_config()['cloud'] - for k in (f'{cloud}_session', f'{cloud}_sesh'): - if k in request.cookies: - cookies[k] = request.cookies[k] - - ready = notebook['state'] == 'Ready' - count = 0 - while count < 10: - try: - new_status = await notebook_status_from_notebook(client_session, k8s, service, headers, cookies, notebook) - changed = await update_notebook_return_changed(dbpool, user_id, notebook, new_status) - if changed: - log.info(f"pod {notebook['pod_name']} status changed: {notebook['state']} => {new_status['state']}") - break - except Exception: # pylint: disable=broad-except - log.exception(f"/wait: error while updating status for pod: {notebook['pod_name']}") - await asyncio.sleep(1) - count += 1 - - ready = new_status and new_status['state'] == 'Ready' - - # 0/1 ready - await ws.send_str(str(int(ready))) - - return ws - - -async def _get_error(service, request, userdata): - if not userdata: - raise web.HTTPFound(deploy_config.external_url(service, '/login')) - - app = request.app - k8s = app['k8s_client'] - dbpool = app['dbpool'] - user_id = str(userdata['id']) - - # we just failed a check, so update status from k8s without probe, - # best we can do is 'Initializing' - notebook = await get_user_notebook(dbpool, user_id) - new_status = await k8s_notebook_status_from_notebook(k8s, notebook) - await update_notebook_return_changed(dbpool, user_id, notebook, new_status) - - session = await aiohttp_session.get_session(request) - if notebook: - if new_status['state'] == 'Ready': - raise web.HTTPFound( - deploy_config.external_url( - service, f'/instance/{notebook["notebook_token"]}/?token={notebook["jupyter_token"]}' - ) - ) - set_message( - session, - 'Could not connect to Jupyter instance. Please wait for Jupyter to be ready and try again.', - 'error', - ) - else: - set_message(session, 'Jupyter instance not found. Please launch a new instance.', 'error') - raise web.HTTPFound(deploy_config.external_url(service, '/notebook')) - - -async def _get_auth(request, userdata): - requested_notebook_token = request.match_info['requested_notebook_token'] - app = request.app - dbpool = app['dbpool'] - - notebook = await get_user_notebook(dbpool, str(userdata['id'])) - if notebook and notebook['notebook_token'] == requested_notebook_token: - pod_ip = notebook['pod_ip'] - if pod_ip: - return web.Response(headers={'pod_ip': f'{pod_ip}:{POD_PORT}'}) - - return web.HTTPForbidden() - - -@routes.get('/notebook') -@auth.web_authenticated_users_only() -async def get_notebook(request, userdata): - return await _get_notebook('notebook', request, userdata) - - -@routes.post('/notebook/delete') -@check_csrf_token -@auth.web_authenticated_users_only(redirect=False) -async def delete_notebook(request, userdata): # pylint: disable=unused-argument - return await _delete_notebook('notebook', request, userdata) - - -@routes.post('/notebook') -@check_csrf_token -@auth.web_authenticated_users_only(redirect=False) -async def post_notebook(request, userdata): - return await _post_notebook('notebook', request, userdata) - - -@routes.get('/auth/{requested_notebook_token}') -@auth.web_authenticated_users_only(redirect=False) -async def get_auth(request, userdata): - return await _get_auth(request, userdata) - - -@routes.get('/images') -async def get_images(request): - images = [DEFAULT_WORKER_IMAGE] - - app = request.app - dbpool = app['dbpool'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('SELECT image FROM workshops WHERE active = 1;') - workshops = await cursor.fetchall() - for workshop in workshops: - images.append(workshop['image']) - - return web.Response(text=' '.join(images)) - - -@routes.get('/notebook/wait') -@auth.web_authenticated_users_only(redirect=False) -async def wait_websocket(request, userdata): - return await _wait_websocket('notebook', request, userdata) - - -@routes.get('/error') -@auth.web_maybe_authenticated_user -async def get_error(request, userdata): - return await _get_error('notebook', request, userdata) - - -@routes.get('/workshop-admin') -@auth.web_authenticated_developers_only() -async def workshop_admin(request, userdata): - dbpool = request.app['dbpool'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('SELECT * FROM workshops') - workshops = await cursor.fetchall() - - page_context = {'workshops': workshops} - return await render_template('notebook', request, userdata, 'workshop-admin.html', page_context) - - -@routes.post('/workshop-admin-create') -@check_csrf_token -@auth.web_authenticated_developers_only() -async def create_workshop(request, userdata): # pylint: disable=unused-argument - dbpool = request.app['dbpool'] - session = await aiohttp_session.get_session(request) - - post = await request.post() - name = post['name'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - try: - active = post.get('active') == 'on' - if active: - token = secrets.token_urlsafe(32) - else: - token = None - await cursor.execute( - ''' -INSERT INTO workshops (name, image, cpu, memory, password, active, token) VALUES (%s, %s, %s, %s, %s, %s, %s); -''', - (name, post['image'], post['cpu'], post['memory'], post['password'], active, token), - ) - set_message(session, f'Created workshop {name}.', 'info') - except pymysql.err.IntegrityError as e: - if e.args[0] == 1062: # duplicate error - set_message(session, f'Cannot create workshop {name}: duplicate name.', 'error') - else: - raise - - raise web.HTTPFound(deploy_config.external_url('notebook', '/workshop-admin')) - - -@routes.post('/workshop-admin-update') -@check_csrf_token -@auth.web_authenticated_developers_only() -async def update_workshop(request, userdata): # pylint: disable=unused-argument - app = request.app - dbpool = app['dbpool'] - - post = await request.post() - name = post['name'] - id = post['id'] - session = await aiohttp_session.get_session(request) - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - active = post.get('active') == 'on' - # FIXME don't set token unless re-activating - if active: - token = secrets.token_urlsafe(32) - else: - token = None - n = await cursor.execute( - ''' -UPDATE workshops SET name = %s, image = %s, cpu = %s, memory = %s, password = %s, active = %s, token = %s WHERE id = %s; -''', - (name, post['image'], post['cpu'], post['memory'], post['password'], active, token, id), - ) - if n == 0: - set_message(session, f'Internal error: cannot update workshop: workshop ID {id} not found.', 'error') - else: - set_message(session, f'Updated workshop {name}.', 'info') - - raise web.HTTPFound(deploy_config.external_url('notebook', '/workshop-admin')) - - -@routes.post('/workshop-admin-delete') -@check_csrf_token -@auth.web_authenticated_developers_only() -async def delete_workshop(request, userdata): # pylint: disable=unused-argument - app = request.app - dbpool = app['dbpool'] - - post = await request.post() - name = post['name'] - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - n = await cursor.execute( - ''' -DELETE FROM workshops WHERE name = %s; -''', - name, - ) - - session = await aiohttp_session.get_session(request) - if n == 1: - set_message(session, f'Deleted workshop {name}.', 'info') - else: - set_message(session, f'Workshop {name} not found.', 'error') - - raise web.HTTPFound(deploy_config.external_url('notebook', '/workshop-admin')) - - -workshop_routes = web.RouteTableDef() - - -@workshop_routes.get('') -@workshop_routes.get('/') -@web_maybe_authenticated_workshop_guest -async def workshop_get_index(request, userdata): - page_context = {'notebook_service': 'workshop'} - return await render_template('workshop', request, userdata, 'workshop/index.html', page_context) - - -@workshop_routes.get('/login') -@web_maybe_authenticated_workshop_guest -async def workshop_get_login(request, userdata): - if userdata: - raise web.HTTPFound(location=deploy_config.external_url('workshop', '/notebook')) - - page_context = {'notebook_service': 'workshop'} - return await render_template('workshop', request, userdata, 'workshop/login.html', page_context) - - -@workshop_routes.post('/login') -@check_csrf_token -async def workshop_post_login(request): - session = await aiohttp_session.get_session(request) - dbpool = request.app['dbpool'] - - post = await request.post() - name = post['name'] - password = post['password'] - - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute( - ''' -SELECT * FROM workshops -WHERE name = %s AND password = %s AND active = 1; -''', - (name, password), - ) - workshops = await cursor.fetchall() - - if len(workshops) != 1: - assert len(workshops) == 0 - set_message(session, 'Workshop Inactive!', 'error') - raise web.HTTPFound(location=deploy_config.external_url('workshop', '/login')) - workshop = workshops[0] - - # use hex since K8s labels can't start or end with _ or - - user_id = secrets.token_hex(16) - session['workshop_session'] = {'workshop_name': name, 'workshop_token': workshop['token'], 'id': user_id} - - set_message(session, f'Welcome to the {name} workshop!', 'info') - - raise web.HTTPFound(location=deploy_config.external_url('workshop', '/notebook')) - - -@workshop_routes.post('/logout') -@check_csrf_token -@web_authenticated_workshop_guest_only(redirect=True) -async def workshop_post_logout(request, userdata): - app = request.app - dbpool = app['dbpool'] - k8s = app['k8s_client'] - user_id = str(userdata['id']) - notebook = await get_user_notebook(dbpool, user_id) - if notebook: - # Notebook is inaccessible since login creates a new random - # user id, so delete it. - await delete_worker_pod(k8s, notebook['pod_name']) - async with dbpool.acquire() as conn: - async with conn.cursor() as cursor: - await cursor.execute('DELETE FROM notebooks WHERE user_id = %s;', user_id) - - session = await aiohttp_session.get_session(request) - if 'workshop_session' in session: - del session['workshop_session'] - - raise web.HTTPFound(location=deploy_config.external_url('workshop', '/notebook')) - - -@workshop_routes.get('/resources') -@web_maybe_authenticated_workshop_guest -async def workshop_get_faq(request, userdata): - page_context = {'notebook_service': 'workshop'} - return await render_template('workshop', request, userdata, 'workshop/resources.html', page_context) - - -@workshop_routes.get('/notebook') -@web_authenticated_workshop_guest_only() -async def workshop_get_notebook(request, userdata): - return await _get_notebook('workshop', request, userdata) - - -@workshop_routes.post('/notebook') -@check_csrf_token -@web_authenticated_workshop_guest_only(redirect=False) -async def workshop_post_notebook(request, userdata): - return await _post_notebook('workshop', request, userdata) - - -@workshop_routes.get('/auth/{requested_notebook_token}') -@web_authenticated_workshop_guest_only(redirect=False) -async def workshop_get_auth(request, userdata): - return await _get_auth(request, userdata) - - -@workshop_routes.post('/notebook/delete') -@check_csrf_token -@web_authenticated_workshop_guest_only(redirect=False) -async def workshop_delete_notebook(request, userdata): - return await _delete_notebook('workshop', request, userdata) - - -@workshop_routes.get('/notebook/wait') -@web_authenticated_workshop_guest_only(redirect=False) -async def workshop_wait_websocket(request, userdata): - return await _wait_websocket('workshop', request, userdata) - - -@workshop_routes.get('/error') -@auth.web_maybe_authenticated_user -async def workshop_get_error(request, userdata): - return await _get_error('workshop', request, userdata) - - -async def on_startup(app): - if 'BATCH_USE_KUBE_CONFIG' in os.environ: - await kubernetes_asyncio.config.load_kube_config() - else: - kubernetes_asyncio.config.load_incluster_config() - app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api() - - app['dbpool'] = await create_database_pool() - - app['client_session'] = httpx.client_session() - - -async def on_cleanup(app): - try: - del app['k8s_client'] - finally: - try: - await app['client_session'].close() - finally: - await asyncio.gather(*(t for t in asyncio.all_tasks() if t is not asyncio.current_task())) - - -def init_app(routes): - app = web.Application(middlewares=[monitor_endpoints_middleware]) - app.on_startup.append(on_startup) - app.on_cleanup.append(on_cleanup) - setup_aiohttp_jinja2(app, 'notebook') - setup_aiohttp_session(app) - - root = os.path.dirname(os.path.abspath(__file__)) - routes.static('/static', f'{root}/static') - setup_common_static_routes(routes) - app.add_routes(routes) - app.router.add_get("/metrics", server_stats) - - return app - - -def run(): - sass_compile('notebook') - - notebook_app = init_app(routes) - workshop_app = init_app(workshop_routes) - - root_app = web.Application() - root_app.add_domain('notebook*', deploy_config.prefix_application(notebook_app, 'notebook')) - root_app.add_domain('workshop*', deploy_config.prefix_application(workshop_app, 'workshop')) - root_app.router.add_get("/metrics", server_stats) - web.run_app( - root_app, host='0.0.0.0', port=5000, access_log_class=AccessLogger, ssl_context=internal_server_ssl_context() - ) diff --git a/notebook/notebook/styles/pages/notebook.scss b/notebook/notebook/styles/pages/notebook.scss deleted file mode 100644 index b5af5deaa72..00000000000 --- a/notebook/notebook/styles/pages/notebook.scss +++ /dev/null @@ -1,91 +0,0 @@ -@import "variables.scss"; - -#notebook { - display: flex; - flex-direction: column; - - .nb { - display: flex; - align-items: center; - justify-content: space-around; - min-width: 320px; - } - - .ready-icon { - font-size: $icon-size; - font-weight: bold; - color: green; - margin-right: $margin; - } - - .spinner { - margin-right: $margin; - } - - .nb-state-container { - flex-direction: column; - display: flex; - /* override link styling */ - color: black; - &:active, - &:hover { - text-decoration: none; - - .nb-link { - text-decoration: underline; - } - } - } - - .nb-link { - color: #07c; - text-decoration: none; - } - - .nb-close { - margin-left: $margin * 4; - font-size: $icon-size; - font-weight: bold; - /* clear button styling */ - border: none; - background: none; - } - - .nb-create { - padding: 10px; - } -} - -.spinner { - width: $icon-size; - height: $icon-size; - border-radius: 100%; - background-color: $devil-gray; - -webkit-animation: pulse 1s infinite ease-in-out; - animation: pulse 1s infinite ease-in-out; -} - -@-webkit-keyframes pulse { - 0% { - -webkit-transform: scale(0); - opacity: 1; - } - 100% { - -webkit-transform: scale(1); - transform: scale(1); - opacity: 0; - } -} - -@keyframes pulse { - 0% { - -webkit-transform: scale(0); - transform: scale(0); - opacity: 1; - } - 100% { - -webkit-transform: scale(1); - transform: scale(1); - opacity: 0; - } -} diff --git a/notebook/notebook/templates/index.html b/notebook/notebook/templates/index.html deleted file mode 100644 index b3c446480d4..00000000000 --- a/notebook/notebook/templates/index.html +++ /dev/null @@ -1,14 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Home{% endblock %} -{% block head %} - -{% endblock %} -{% block content %} -
-

- - Hail - -

-
-{% endblock %} diff --git a/notebook/notebook/templates/login.html b/notebook/notebook/templates/login.html deleted file mode 100644 index b3c446480d4..00000000000 --- a/notebook/notebook/templates/login.html +++ /dev/null @@ -1,14 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Home{% endblock %} -{% block head %} - -{% endblock %} -{% block content %} -
-

- - Hail - -

-
-{% endblock %} diff --git a/notebook/notebook/templates/notebook-form.html b/notebook/notebook/templates/notebook-form.html deleted file mode 100644 index 0a057e82ebb..00000000000 --- a/notebook/notebook/templates/notebook-form.html +++ /dev/null @@ -1,4 +0,0 @@ -
- - -
diff --git a/notebook/notebook/templates/notebook-state.html b/notebook/notebook/templates/notebook-state.html deleted file mode 100644 index 50ae058bfd0..00000000000 --- a/notebook/notebook/templates/notebook-state.html +++ /dev/null @@ -1,32 +0,0 @@ - -{% if notebook['state'] != 'Ready' %} - -{% endif %} diff --git a/notebook/notebook/templates/notebook.html b/notebook/notebook/templates/notebook.html deleted file mode 100644 index bcf8563b5d9..00000000000 --- a/notebook/notebook/templates/notebook.html +++ /dev/null @@ -1,14 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Notebook{% endblock %} -{% block head %} - -{% endblock %} -{% block content %} -
- {% if notebook is none %} - {% include 'notebook-form.html' %} - {% else %} - {% include 'notebook-state.html' %} - {% endif %} -
-{% endblock %} diff --git a/notebook/notebook/templates/workshop-admin.html b/notebook/notebook/templates/workshop-admin.html deleted file mode 100644 index 33e8f39535c..00000000000 --- a/notebook/notebook/templates/workshop-admin.html +++ /dev/null @@ -1,79 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Workshops{% endblock %} -{% block content %} -
-

Workshops

- - - - - - - - - - - - - - {% for workshop in workshops %} - - - - - - - - - - - - - - - {% endfor %} - - - - - - - - - - - - - -
NameImageCPUMemoryPasswordActive
{{ workshop['name'] }} - - - - - -
- - - - - - - -
-

Delete Workshop

-
- - - - -
-
-{% endblock %} diff --git a/notebook/notebook/templates/workshop/header.html b/notebook/notebook/templates/workshop/header.html deleted file mode 100644 index da24807f323..00000000000 --- a/notebook/notebook/templates/workshop/header.html +++ /dev/null @@ -1,31 +0,0 @@ - diff --git a/notebook/notebook/templates/workshop/index.html b/notebook/notebook/templates/workshop/index.html deleted file mode 100644 index 853ddc421b9..00000000000 --- a/notebook/notebook/templates/workshop/index.html +++ /dev/null @@ -1,27 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Join Workshop{% endblock %} -{% block content %} -
-

Welcome!

- {% if userdata %} -

Welcome to the Hail-powered {{ userdata['workshop_name'] }} workshop!

- {% else %} -

Welcome to the Hail workshop service!

- {% endif %} -

Navigate to the Notebook - tab to launch Jupyter with workshop materials installed, or - return to an existing Jupyter instance.

- -

Warning: Notebooks are - ephemeral. If you close Jupyter or log out, any work in the - notebook will be lost. To download a notebook, use the File > - Download as menu in Jupyter.

- -

The notebook service is not currently available outside of - workshops like this, but we (the Hail team) are working hard to - change that.

-

- Start|Resources -

-
-{% endblock %} diff --git a/notebook/notebook/templates/workshop/login.html b/notebook/notebook/templates/workshop/login.html deleted file mode 100644 index 56c0357bbc3..00000000000 --- a/notebook/notebook/templates/workshop/login.html +++ /dev/null @@ -1,26 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Join Workshop{% endblock %} -{% block content %} -
-
- - - - - - - - - - - - -
Workshop name:
Password:
- -
- -
-
-{% endblock %} diff --git a/notebook/notebook/templates/workshop/resources.html b/notebook/notebook/templates/workshop/resources.html deleted file mode 100644 index 2b22921e7c4..00000000000 --- a/notebook/notebook/templates/workshop/resources.html +++ /dev/null @@ -1,30 +0,0 @@ -{% extends "layout.html" %} -{% block title %}Resources{% endblock %} -{% block content %} -
-

Workshop Resources

-

Institute for Behavioral Genetics Statistical Genetics Workshop 2021

-

Dates: June 16th, 2021

> -

Hail version: 0.2.69

-

Notebooks, slides, and data

-

BroadE Workshop 2021

-

Dates: April 8th, 2021

> -

Hail version: 0.2.64

-

Notebooks, slides, and data

-

ATGU Workshop 2020: Common and Rare Variant Analysis

-

Dates: July 24th, 2020 and August 12th, 2020

> -

Hail version: 0.2.53

-

Notebooks, slides, and data

-

BroadE Workshop 2020

-

Date: March 5th, 2020

-

Hail version: 0.2.33

-

Slides

-

Notebooks and data

-

ASHG 2019

-

Date: October 16th, 2019

-

Hail version: 0.2.25

-

Slides

-

Notebooks and data

-
- -{% endblock %} diff --git a/notebook/proxy.conf b/notebook/proxy.conf deleted file mode 100644 index 1bae6d6b986..00000000000 --- a/notebook/proxy.conf +++ /dev/null @@ -1,6 +0,0 @@ -proxy_set_header Host $http_host; -proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; -proxy_set_header X-Forwarded-Host $updated_host; -proxy_set_header X-Forwarded-Proto $updated_scheme; -proxy_set_header X-Real-IP $http_x_real_ip; -include /ssl-config/ssl-config-proxy.conf; diff --git a/notebook/scale-test.py b/notebook/scale-test.py deleted file mode 100644 index ebc83735d68..00000000000 --- a/notebook/scale-test.py +++ /dev/null @@ -1,123 +0,0 @@ -import argparse -import asyncio -import logging -import math -import time - -import aiohttp -import numpy as np - -from hailtop.auth import hail_credentials -from hailtop.config import get_deploy_config -from hailtop.hail_logging import configure_logging -from hailtop.httpx import client_session - -configure_logging() -log = logging.getLogger('nb-scale-test') - -deploy_config = get_deploy_config() - - -def get_cookie(session, name): - for cookie in session.cookie_jar: - if cookie.key == name: - return cookie.value - return None - - -async def run(args, i): - async with hail_credentials(authorize_target=False) as credentials: - headers = await credentials.auth_headers() - - async with client_session() as session: - # make sure notebook is up - async with session.get(deploy_config.url('workshop', ''), headers=headers) as resp: - await resp.text() - - log.info(f'{i} loaded notebook home page') - - # log in as workshop guest - # get csrf token - async with session.get(deploy_config.url('workshop', '/login'), headers=headers) as resp: - pass - - data = aiohttp.FormData() - data.add_field(name='name', value=args.workshop) - data.add_field(name='password', value=args.password) - data.add_field(name='_csrf', value=get_cookie(session, '_csrf')) - async with session.post(deploy_config.url('workshop', '/login'), data=data, headers=headers) as resp: - pass - - log.info(f'{i} logged in') - - # create notebook - # get csrf token - async with session.get(deploy_config.url('workshop', '/notebook'), headers=headers) as resp: - pass - - data = aiohttp.FormData() - data.add_field(name='_csrf', value=get_cookie(session, '_csrf')) - async with session.post(deploy_config.url('workshop', '/notebook'), data=data, headers=headers) as resp: - pass - - log.info(f'{i} created notebook') - - start = time.time() - - # wait for notebook ready - ready = False - attempt = 0 - # 5 attempts overkill, should only take 2: Scheduling => Running => Ready - while not ready and attempt < 5: - async with session.ws_connect( - deploy_config.url('workshop', '/notebook/wait', base_scheme='ws'), headers=headers - ) as ws: - async for msg in ws: - if msg.data == '1': - ready = True - attempt += 1 - - end = time.time() - duration = end - start - - log.info(f'{i} notebook state {ready} duration {duration}') - - # delete notebook - # get csrf token - async with session.get(deploy_config.url('workshop', '/notebook'), headers=headers) as resp: - pass - - data = aiohttp.FormData() - data.add_field(name='_csrf', value=get_cookie(session, '_csrf')) - async with session.post(deploy_config.url('workshop', '/notebook/delete'), data=data, headers=headers) as resp: - pass - - log.info(f'{i} notebook delete, done.') - - return duration, ready - - -async def main(): - parser = argparse.ArgumentParser(description='Notebook scale test.') - parser.add_argument('n', type=int, help='number of notebooks to start') - parser.add_argument('workshop', type=str, help='workshop name') - parser.add_argument('password', type=str, help='workshop password') - args = parser.parse_args() - - n = args.n - d = int(math.log10(n)) + 1 - outcomes = await asyncio.gather(*[run(args, str(i).zfill(d)) for i in range(n)]) - - times = [] - for duration, ready in outcomes: - if ready: - times.append(duration) - - print(f'successes: {len(times)} / {n} = {len(times) / n}') - print(f'mean time: {sum(times) / n}') - print(f'quantiles min/50%/95%/99%/max: {np.quantile(times, [0.0, .5, .95, .99, 1.0])}') - print(f'histogram:\n{np.histogram(times, density=True)}') - - -loop = asyncio.get_event_loop() -loop.run_until_complete(main()) diff --git a/notebook/setup.py b/notebook/setup.py deleted file mode 100644 index a012961a486..00000000000 --- a/notebook/setup.py +++ /dev/null @@ -1,12 +0,0 @@ -from setuptools import find_packages, setup - -setup( - name='notebook', - version='0.0.1', - url='https://github.com/hail-is/hail.git', - author='Hail Team', - author_email='hail@broadinstitute.org', - description='Notebook service', - packages=find_packages(), - include_package_data=True, -) diff --git a/notebook/sql/initial.sql b/notebook/sql/initial.sql deleted file mode 100644 index d61f71b0307..00000000000 --- a/notebook/sql/initial.sql +++ /dev/null @@ -1,24 +0,0 @@ -CREATE TABLE `notebooks` ( - `user_id` varchar(255) NOT NULL, - `pod_name` varchar(255) NOT NULL, - `notebook_token` varchar(255) NOT NULL, - # Scheduling, Initializing, Running, Ready - `state` varchar(255) NOT NULL, - `pod_ip` varchar(255), - `creation_date` TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - `jupyter_token` varchar(255) NOT NULL, - PRIMARY KEY (`user_id`) -) ENGINE=InnoDB; - -CREATE TABLE `workshops` ( - `id` INT(11) NOT NULL AUTO_INCREMENT, - `name` varchar(255) NOT NULL, - `image` varchar(255), - `cpu` varchar(255), - `memory` varchar(255), - `password` varchar(255), - `active` tinyint(1) DEFAULT 0, - `token` varchar(255), - PRIMARY KEY (`id`), - UNIQUE KEY `name` (`name`) -) ENGINE=InnoDB; diff --git a/notebook/test-playbook.txt b/notebook/test-playbook.txt deleted file mode 100644 index 29b7211b62c..00000000000 --- a/notebook/test-playbook.txt +++ /dev/null @@ -1,79 +0,0 @@ -1. Basic functionality - - - be logged out - - navigate to notebook.hail.is - - you will be prompted to log in - - launch Jupyter - - connect to Jupyter - - open a notebook - - verify kernel works, evaluate e.g. 1 + 1 - - close notebook (click x) - - verify pod is deleted - - log out - -2. Notebook Error - -A. Deleted pod - - - create a noteobok - - delete the notebook pod - - connect to Jupyter - - it should open a new tab - - it sholud return to notebook.hail.is/notebook with an error and the - launch Jupyter button - -B. Frozen pod - - - launch Jupyter - - log into the k8s node running the pod - - docker pause - - connect to Jupyter - - it should create a second tab - - let nginx gateway time out - - it should return to notebook.hail.is/noebook with an error, the - notebook should be in the Initializing state - - docker unpause - - state should transition to ready - -3. Workshop Admin - - - log in with a developer account - - navigate to Notebook > Workshop Admin - - create new workshop - - create new workshop with name of existing workshop - - update workshop changing all fields - - delete workshop - - delete non-existent workshop - -4. Workshop - -A. Basic functionality - - - navigate to workshop.hail.is - - welcome text should be shown - - log in with an invalid workshop - - log in with an invalid password - - log in with a valid workshop/password that is not active - - each should return you to the login page with an error message - - log in with valid workshop and password - - you should be redirected to workshop.hail.is/notebook with success message and the launch Jupyter button - - log out - - - in addition, 1. and 2. should be repeated in workshop.hail.is - -B. Deactivate workshop - -i. Launch Jupyter - - - log into an active workshop - - deactivate the workshop in another tab - - launch Jupyter - - you should be redirected to workshop/login - -ii. Connect to Jupyter - - - log into an active workshop - - launch Jupyter - - deactivate the workshop in another tab - - connect to Jupyer - - you should be redirected to workshop/login diff --git a/notebook/worker/Dockerfile b/notebook/worker/Dockerfile deleted file mode 100644 index 5d2db78fd2b..00000000000 --- a/notebook/worker/Dockerfile +++ /dev/null @@ -1,24 +0,0 @@ -FROM jupyter/scipy-notebook -MAINTAINER Hail Team - -USER root -RUN apt-get update && apt-get install -y \ - openjdk-8-jre-headless \ - && rm -rf /var/lib/apt/lists/* -USER jovyan - -RUN pip install --upgrade --no-cache-dir \ - 'jupyter-spark<0.5' \ - hail==0.2.4 \ - jupyter_contrib_nbextensions \ - && \ - pip check && \ - jupyter serverextension enable --user --py jupyter_spark && \ - jupyter nbextension install --user --py jupyter_spark && \ - jupyter contrib nbextension install --user && \ - jupyter nbextension enable --user --py jupyter_spark && \ - jupyter nbextension enable --user --py widgetsnbextension && \ - jupyter nbextension enable --user collapsible_headings/main && \ - jupyter nbextension enable --user move_selected_cells/main - -COPY ./resources/ /home/jovyan diff --git a/notebook/worker/resources/Hail-Workshop-Notebook.ipynb b/notebook/worker/resources/Hail-Workshop-Notebook.ipynb deleted file mode 100644 index e03710018d0..00000000000 --- a/notebook/worker/resources/Hail-Workshop-Notebook.ipynb +++ /dev/null @@ -1,1531 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Hail workshop\n", - "\n", - "This notebook will introduce the following concepts:\n", - "\n", - " - Using Jupyter notebooks effectively\n", - " - Loading genetic data into Hail\n", - " - General-purpose data exploration functionality\n", - " - Plotting functionality\n", - " - Quality control of sequencing data\n", - " - Running a Genome-Wide Association Study (GWAS)\n", - " - Rare variant burden tests\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Hail on Jupyter\n", - "\n", - "From https://jupyter.org: \n", - "\n", - "\"The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain live code, equations, visualizations and narrative text. Uses include: data cleaning and transformation, numerical simulation, statistical modeling, data visualization, machine learning, and much more.\"\n", - "\n", - "In the last year, the Jupyter development team [released Jupyter Lab](https://blog.jupyter.org/jupyterlab-is-ready-for-users-5a6f039b8906), an integrated environment for data, code, and visualizations. If you've used R Studio, this is the closest thing that works in Python (and many other languages!)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Why notebooks?\n", - "\n", - "Part of what we think is so exciting about Hail is that it has coincided with a larger shift in the data science community.\n", - "\n", - "Three years ago, most computational biologists at Broad analyzed genetic data using command-line tools, and took advantage of research compute clusters by explicitly using scheduling frameworks like LSF or Sun Grid Engine.\n", - "\n", - "Now, they have the option to use Hail in interactive Python notebooks backed by thousands of cores on public compute clouds like [Google Cloud](https://cloud.google.com/), [Amazon Web Services](https://aws.amazon.com/), or [Microsoft Azure](https://azure.microsoft.com/)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Using Jupyter" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Running cells\n", - "\n", - "Evaluate cells using `SHIFT + ENTER`. Select the next cell and run it." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print('Hello, world')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Modes\n", - "\n", - "Jupyter has two modes, a **navigation mode** and an **editor mode**.\n", - "\n", - "#### Navigation mode:\n", - "\n", - " - BLUE cell borders\n", - " - `UP` / `DOWN` move between cells\n", - " - `ENTER` while a cell is selected will move to **editing mode**.\n", - " - Many letters are keyboard shortcuts! This is a common trap.\n", - " \n", - "#### Editor mode:\n", - "\n", - " - GREEN cell borders\n", - " - `UP` / `DOWN`/ move within cells before moving between cells.\n", - " - `ESC` will return to **navigation mode**.\n", - " - `SHIFT + ENTER` will evaluate a cell and return to **navigation mode**." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Cell types\n", - "\n", - "There are several types of cells in Jupyter notebooks. The two you will see here are **Markdown** (text) and **Code**." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# This is a code cell\n", - "my_variable = 5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "**This is a markdown cell**, so even if something looks like code (as below), it won't get executed!\n", - "\n", - "my_variable += 1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(my_variable)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common gotcha: a code cell turns into markdown\n", - "\n", - "This can happen if you are in **navigation mode** and hit the keyboard shortcut `m` while selecting a code cell.\n", - "\n", - "You can either navigate to `Cell > Cell Type > Code` through the top menu, or use the keyboard shortcut `y` to turn it back to code." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Tips and tricks\n", - "\n", - "Keyboard shortcuts:\n", - "\n", - " - `SHIFT + ENTER` to evaluate a cell\n", - " - `ESC` to return to navigation mode\n", - " - `y` to turn a markdown cell into code\n", - " - `m` to turn a code cell into markdown\n", - " - `a` to add a new cell **above** the currently selected cell\n", - " - `b` to add a new cell **below** the currently selected cell\n", - " - `d, d` (repeated) to delete the currently selected cell\n", - " - `TAB` to activate code completion\n", - " \n", - "To try this out, create a new cell below this one using `b`, and print `my_variable` by starting with `print(my` and pressing `TAB`!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Common gotcha: the state of your code seems wrong\n", - "\n", - "Jupyter makes it easy to get yourself into trouble by executing cells out-of-order, or multiple times.\n", - "\n", - "For example, if I declare `x`:\n", - "\n", - "```\n", - "x = 5\n", - "```\n", - "\n", - "Then have a cell that reads:\n", - "\n", - "```\n", - "x += 1\n", - "```\n", - "\n", - "And finally:\n", - "\n", - "```\n", - "print(x)\n", - "```\n", - "\n", - "If you execute these cells in order and once, I'll see the notebook print `6`. However, there is **nothing stopping you** from executing the middle cell ten times, printing `16`!\n", - "\n", - "### Solution\n", - "\n", - "If you get yourself into trouble into this way, the solution is to clear the kernel (Python process) and start again from the top.\n", - "\n", - "First, `Kernel > Restart & Clear Output > (accept dialog)`.\n", - "\n", - "Second, `Cell > Run all above`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Set up our Python environment" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In addition to Hail, we import a few methods from the [bokeh](https://bokeh.pydata.org/en/latest/) plotting library. We'll see examples soon!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import hail as hl\n", - "from bokeh.io import output_notebook, show" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Now we initialize Hail and set up Bokeh to display inline in the notebook." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.init()\n", - "output_notebook()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Download public 1000 Genomes data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The workshop materials are designed to work on a small (~20MB) downsampled chunk of the public 1000 Genomes dataset.\n", - "\n", - "You can run these same functions on your computer or on the cloud!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.utils.get_1kg('data/')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is possible to call command-line utilities from Jupyter by prefixing a line with a `!`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! ls -1 data/" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 1: Explore genetic data with Hail" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import data from VCF\n", - "\n", - "The [Variant Call Format (VCF)](https://en.wikipedia.org/wiki/Variant_Call_Format) is a common file format for representing genetic data collected on multiple individuals (samples).\n", - "\n", - "Hail's [import_vcf](https://hail.is/docs/0.2/methods/impex.html#hail.methods.import_vcf) function can read this format.\n", - "\n", - "However, VCF is a text format that is easy for humans but very bad for computers. The first thing we do is `write` to a Hail native file format, which is much faster!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.import_vcf('data/1kg.vcf.bgz').write('data/1kg.mt', overwrite=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Read 1KG into Hail\n", - "\n", - "We represent genetic data as a Hail [MatrixTable](https://hail.is/docs/0.2/overview/matrix_table.html), and name our variable `mt` to indicate this." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.read_matrix_table('data/1kg.mt')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is a `MatrixTable`?\n", - "\n", - "Let's describe it!\n", - "\n", - "The `describe` method prints the **schema**, that is, the fields in the dataset and their types.\n", - "\n", - "You can see:\n", - " - **numeric** types:\n", - " - integers (`int32`, `int64`), e.g. `5`\n", - " - floating point numbers (`float32`, `float64`), e.g. `5.5` or `3e-8`\n", - " - **strings** (`str`), e.g. `\"Foo\"`\n", - " - **boolean** values (`bool`) e.g. `True`\n", - " - **collections**:\n", - " - arrays (`array`), e.g. `[1,1,2,3]`\n", - " - sets (`set`), e.g. `{1,3}`\n", - " - dictionaries (`dict`), e.g. `{'Foo': 5, 'Bar': 10}`\n", - " - **genetic data types**:\n", - " - loci (`locus`), e.g. `[GRCh37] 1:10000` or `[GRCh38] chr1:10024`\n", - " - genotype calls (`call`), e.g. `0/2` or `1|0`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `count`\n", - "\n", - "`MatrixTable.count` returns a tuple with the number of rows (variants) and number of columns (samples)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.count()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### `show`\n", - "\n", - "There is no `mt.show()` method, but you can show individual fields like the sample ID, `s`, or the locus." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.s.show(5)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.locus.show(5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: show other fields\n", - "\n", - "You can see the names of fields above. `show()` the first few values for a few of them, making sure to include at least one **row field** and **at least one entry field**. Capitalization is important.\n", - "\n", - "To print fields inside the `info` structure, you must add another dot, e.g. `mt.info.AN`.\n", - "\n", - "What do you notice being printed alongside some of the fields?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Hail has functions built for genetics\n", - "\n", - "For example, `hl.summarize_variants` prints useful statistics about the genetic variants in the dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "hl.summarize_variants(mt)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Most of Hail's functionality is totally general-purpose!\n", - "\n", - "Functions like `summarize_variants` are built out of Hail's general-purpose data manipulation functionality. We can use Hail to ask arbitrary questions about the data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "mt.aggregate_rows(hl.agg.count_where(mt.alleles == ['A', 'T']))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Or if we had flight data:\n", - "\n", - "```\n", - "flight_data.aggregate(\n", - " hl.agg.count_where(flight_data.departure_city == 'Boston')\n", - ")\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The `counter` aggregator makes it possible to see distributions of categorical data, like alleles:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "snp_counts = mt.aggregate_rows(\n", - " hl.array(hl.agg.counter(mt.alleles)))\n", - "snp_counts" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "By sorting the result in Python, we can recover an interesting bit of biology..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sorted(snp_counts,\n", - " key=lambda x: x[1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Question: What is interesting about this distribution?\n", - "\n", - "### Question: Why do the counts come in pairs?\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## A closer look at GQ\n", - "\n", - "The GQ field in our dataset is an interesting one to explore further, and we can use various pieces of Hail's functionality to do so.\n", - "\n", - "**GQ** stands for **Genotype Quality**, and reflects confidence in a genotype call. It is a non-negative **integer** truncated at 99, and is the **phred-scaled** probability of the second-most-likely genotype call.\n", - "\n", - "Phred-scaling a value is the following transformation:\n", - "\n", - "$\\quad Phred(x) = -10 * log_{10}(x)$\n", - "\n", - "#### Example:\n", - "\n", - "\n", - "$\\quad p_{0/0} = 0.9899$\n", - "\n", - "$\\quad p_{0/1} = 0.01$\n", - "\n", - "$\\quad p_{1/1} = 0.001$\n", - "\n", - "In this case,\n", - "\n", - "$\\quad GQ = -10 * log_{10} (0.01) = 20$\n", - "\n", - "\n", - "Higher GQ values indicate higher confidence. $GQ=10$ is 90% confidence, $GQ=20$ is 99% confidence, $GQ=30$ is 99.9% confidence, and so on." - ] - }, - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "### GQ in our dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "mt.aggregate_entries(hl.agg.stats(mt.GQ))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using our equation above, the mean value indicates about 99.9% confidence. But it's not generally a good idea to draw conclusions just based on a mean and standard deviation...\n", - "\n", - "It is possible to build more complicated queries using small pieces. We can use `hl.agg.filter` to compute conditional statistics:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.aggregate_entries(hl.agg.filter(mt.GT.is_het(),\n", - " hl.agg.stats(mt.GQ)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To look at GQ at genotypes that are **not heterozygous**, we need add only one character (`~`):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "mt.aggregate_entries(hl.agg.filter(~mt.GT.is_het(), \n", - " hl.agg.stats(mt.GQ)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are often many ways to accomplish something in Hail. We could have done these both together (and more efficiently!) using `hl.agg.group_by`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.aggregate_entries(hl.agg.group_by(mt.GT, \n", - " hl.agg.stats(mt.GQ)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Of course, the best way to understand a distribution is to **look** at it!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.histogram(\n", - " mt.GQ, \n", - " bins=100)\n", - "\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: What's going on here? Investigate!\n", - "\n", - "*Hint: try copying some of the cells above and looking at DP, the sequencing depth, as well as GQ. The ratio between the two may also be interesting...*\n", - "\n", - "*Hint: if you want to plot a filtered GQ distribution, you can use something like:*\n", - "\n", - " p = hl.plot.histogram(mt.filter_entries(mt.GT.is_het()).GQ, bins=100)\n", - "\n", - "Remember that you can create a new cell using keyboard shortcuts `A` or `B` in **navigation mode**.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 2: Annotation and quality control" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Integrate sample information\n", - "\n", - "We're building toward a genome-wide association test in part 3, but we don't just need genetic data to do a GWAS -- we also need phenotype data! Luckily, our `hl.utils.get_1kg` function also downloaded some simulated phenotype data.\n", - "\n", - "This is a text file:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! head data/1kg_annotations.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can import it as a [Hail Table](https://hail.is/docs/0.2/overview/table.html) with [hl.import_table](https://hail.is/docs/0.2/methods/impex.html?highlight=import_table#hail.methods.import_table).\n", - "\n", - "We call it \"sa\" for \"sample annotations\"." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sa = hl.import_table('data/1kg_annotations.txt', \n", - " impute=True, \n", - " key='Sample')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "While we can see the names and types of fields in the logging messages, we can also `describe` and `show` this table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sa.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sa.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Add sample metadata into our 1KG `MatrixTable`\n", - "\n", - "It's short and easy:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.annotate_cols(pheno = sa[mt.s])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What's going on here?\n", - "\n", - "Understanding what's going on here is a bit more difficult. To understand, we need to understand a few pieces:\n", - "\n", - "#### 1. `annotate` methods\n", - "\n", - "In Hail, `annotate` methods refer to **adding new fields**. \n", - "\n", - " - `MatrixTable`'s `annotate_cols` adds new column fields.\n", - " - `MatrixTable`'s `annotate_rows` adds new row fields.\n", - " - `MatrixTable`'s `annotate_entries` adds new entry fields.\n", - " - `Table`'s `annotate` adds new row fields.\n", - "\n", - "In the above cell, we are adding a new coluimn field called \"pheno\". This field should be the values in our table `sa` associated with the sample ID `s` in our `MatrixTable` - that is, this is performing a **join**.\n", - "\n", - "Python uses square brackets to look up values in dictionaries:\n", - "\n", - " d = {'foo': 5, 'bar': 10}\n", - " d['foo']\n", - "\n", - "You should think of this in much the same way - for each column of `mt`, we are looking up the fields in `sa` using the sample ID `s`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: Query some of these column fields using `mt.aggregate_cols`.\n", - "\n", - "Some of the aggregators we used earlier:\n", - " - `hl.agg.counter`\n", - " - `hl.agg.stats`\n", - " - `hl.agg.count_where`\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sample QC\n", - "\n", - "We'll start with examples of sample QC.\n", - "\n", - "Hail has the function [hl.sample_qc](https://hail.is/docs/0.2/methods/genetics.html#hail.methods.sample_qc) to compute a list of useful statistics about samples from sequencing data.\n", - "\n", - "**Click the link** above to see the documentation, which lists the fields and their descriptions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.sample_qc(mt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.sample_qc.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.scatter(x=mt.sample_qc.r_het_hom_var,\n", - " y=mt.sample_qc.call_rate)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: Plot some other fields!\n", - "\n", - "Modify the cell above. Remember `hl.plot.histogram` as well!\n", - "\n", - "If you want to start getting fancy, you can plot more complicated expressions -- the ratio between two fields, for instance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Filter columns using generated QC statistics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.filter_cols(mt.sample_qc.dp_stats.mean >= 4)\n", - "mt = mt.filter_cols(mt.sample_qc.call_rate >= 0.97)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Entry QC\n", - "\n", - "We explored GQ above, and analysts often set thresholds for GQ to filter entries (genotypes). Another useful metric is **allele read balance**.\n", - "\n", - "This value is defined by:\n", - "\n", - "$\\quad AB = \\dfrac{N_{alt}}{{N_{ref} + N_{alt}}}$\n", - "\n", - "Where $N_{ref}$ is the number of reference reads and $N_{alt}$ is the number of alternate reads.\n", - "\n", - "We want " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# call rate before filtering\n", - "mt.aggregate_entries(hl.agg.fraction(hl.is_defined(mt.GT)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ab = mt.AD[1] / hl.sum(mt.AD)\n", - "\n", - "filter_condition_ab = (\n", - " hl.case()\n", - " .when(mt.GT.is_hom_ref(), ab <= 0.1)\n", - " .when(mt.GT.is_het(), (ab >= 0.25) & (ab <= 0.75))\n", - " .default(ab >= 0.9) # hom-var\n", - ")\n", - "\n", - "mt = mt.filter_entries(filter_condition_ab)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# call rate after filtering\n", - "mt.aggregate_entries(hl.agg.fraction(hl.is_defined(mt.GT)))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Variant QC\n", - "\n", - "Hail has the function [hl.variant_qc](https://hail.is/docs/0.2/methods/genetics.html#hail.methods.variant_qc) to compute a list of useful statistics about **variants** from sequencing data.\n", - "\n", - "Once again, **Click the link** above to see the documentation!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = hl.variant_qc(mt)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "mt.variant_qc.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.variant_qc.AF.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Remove rare sites:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.filter_rows(hl.min(mt.variant_qc.AF) > 1e-6)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Remove sites far from [Hardy-Weinberg equilbrium](https://en.wikipedia.org/wiki/Hardy%E2%80%93Weinberg_principle):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.filter_rows(mt.variant_qc.p_value_hwe > 0.005)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# final variant and sample count\n", - "mt.count()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 3: GWAS!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "A GWAS is an independent association test performed per variant of a genetic dataset. We use the same phenotype and covariates, but test the genotypes for each variant separately. \n", - "\n", - "In Hail, the method we use is [hl.linear_regression_rows](https://hail.is/docs/0.2/methods/stats.html#hail.methods.linear_regression_rows).\n", - "\n", - "We use the phenotype `CaffeineConsumption` as our dependent variable, the number of alternate alleles as our independent variable, and no covariates besides an intercept term (that's the `1.0`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gwas = hl.linear_regression_rows(y=mt.pheno.CaffeineConsumption, \n", - " x=mt.GT.n_alt_alleles(), \n", - " covariates=[1.0])\n", - "gwas.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Two of the plots that analysts generally produce are a [Manhattan plot](https://en.wikipedia.org/wiki/Manhattan_plot) and a [Q-Q plot](https://en.wikipedia.org/wiki/Q%E2%80%93Q_plot).\n", - "\n", - "We'll start with the Manhattan plot:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.manhattan(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.qq(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Confounded!\n", - "\n", - "The Q-Q plot indicates **extreme** inflation of p-values.\n", - "\n", - "If you've done a GWAS before, you've probably included a few other covariates -- age, sex, and principal components.\n", - "\n", - "Principal components are a measure of genetic ancestry, and can be used to control for [population stratification](https://en.wikipedia.org/wiki/Population_stratification).\n", - "\n", - "We can compute principal components with Hail:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_eigenvalues, pca_scores, pca_loadings = hl.hwe_normalized_pca(mt.GT, compute_loadings=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **eigenvalues** reflect the amount of variance explained by each principal component:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_eigenvalues" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **scores** are the principal components themselves, computed per sample." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_scores.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_scores.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The **loadings** are the contributions to each component for each variant." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pca_loadings.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can **annotate** the principal components back onto `mt`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.annotate_cols(pca = pca_scores[mt.s])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Principal components measure ancestry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "p = hl.plot.scatter(mt.pca.scores[0], \n", - " mt.pca.scores[1],\n", - " label=mt.pheno.SuperPopulation)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Question: Does your plot match your neighbors'?\n", - "\n", - "If not, how is it different?\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Control confounders and run another GWAS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gwas = hl.linear_regression_rows(\n", - " y=mt.pheno.CaffeineConsumption, \n", - " x=mt.GT.n_alt_alleles(),\n", - " covariates=[1.0, mt.pheno.isFemale, mt.pca.scores[0], mt.pca.scores[1], mt.pca.scores[2]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.qq(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "p = hl.plot.manhattan(gwas.p_value)\n", - "show(p)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Part 4: Burden tests" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "GWAS is a great tool for finding associations between **common variants** and disease, but a GWAS can't hope to find associations between rare variants and disease. Even if we have sequencing data for 1,000,000 people, we won't have the statistical power to link a mutation found in only a few people to any disease.\n", - "\n", - "But rare variation has lots of information - especially because statistical genetic theory dictates that rarer variants have, on average, stronger effects on disease per allele.\n", - "\n", - "One possible strategy is to **group together rare variants with similar predicted consequence**. For example, we can group all variants that are predicted to knock out the function of each gene and test the variants for each gene as a group.\n", - "\n", - "We will be running a burden test on our common variant dataset to demonstrate the technical side, but we shouldn't hope to find anything here -- especially because we've only got 10,000 variants!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Import gene data\n", - "\n", - "We start by importing data about genes.\n", - "\n", - "First, we need to download it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "! wget https://storage.googleapis.com/hail-tutorial/ensembl_gene_annotations.txt -O data/ensembl_gene_annotations.txt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht = hl.import_table('data/ensembl_gene_annotations.txt', impute=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht.count()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create an interval key" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gene_ht = gene_ht.transmute(interval = hl.locus_interval(gene_ht['Chromosome'],\n", - " gene_ht['Gene start'],\n", - " gene_ht['Gene end']))\n", - "gene_ht = gene_ht.key_by('interval')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Annotate variants using these intervals" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt = mt.annotate_rows(gene_info = gene_ht[mt.locus])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt.gene_info.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Aggregate genotypes per gene\n", - "\n", - "There is no `hl.burden_test` function -- instead, a burden test is the composition of two modular pieces of Hail functionality:\n", - "\n", - " - `group_rows_by / aggregate`\n", - " - `hl.linear_regression_rows`.\n", - " \n", - "While this might be a few more lines of code to write than `hl.burden_test`, it means that you can flexibly specify the genotype aggregation however you like. Using other tools , you may have a few ways to aggregate, but if you want to do something different you are out of luck!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_mt = (\n", - " mt\n", - " .group_rows_by(gene = mt.gene_info['Gene name'])\n", - " .aggregate(n_variants = hl.agg.count_where(mt.GT.n_alt_alleles() > 0))\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_mt.describe()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### What is `burden_mt`?\n", - "\n", - "It is a **gene-by-sample** matrix (compare to `mt`, a **variant-by-sample** matrix).\n", - "\n", - "It has one row field, the `gene`.\n", - "\n", - "It has one entry field, `n_variants`.\n", - "\n", - "It has all the column fields from `mt`." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Run linear regression per gene\n", - "\n", - "This should look familiar!" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_results = hl.linear_regression_rows(\n", - " y=burden_mt.pheno.CaffeineConsumption, \n", - " x=burden_mt.n_variants,\n", - " covariates=[1.0, \n", - " burden_mt.pheno.isFemale, \n", - " burden_mt.pca.scores[0], \n", - " burden_mt.pca.scores[1], \n", - " burden_mt.pca.scores[2]])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Sorry, no `hl.plot.manhattan` for genes!\n", - "\n", - "Instead, we can sort by p-value and print:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "burden_results.order_by(burden_results.p_value).show()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Exercise: Where along the genome can we find the top gene?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Statistics question: What is the significance threshold for a burden test?\n", - "\n", - "Is this top gene genome-wide significant?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": { - "height": "242px", - "width": "283px" - }, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 1 -} diff --git a/pyproject.toml b/pyproject.toml index e1005150311..4fa299f1a71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,6 @@ ignore = [ "hail/python/hail", "hail/python/test/hail", "hail/python/hailtop/batch/docs", - "notebook", "query", "datasets", "benchmark", diff --git a/tls/config.yaml b/tls/config.yaml index 7bf409de936..b2cbffca020 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -27,16 +27,6 @@ principals: domains: - ci kind: json -- name: notebook-python - domains: - - notebook - - workshop - kind: json -- name: notebook - domains: - - notebook - - workshop - kind: nginx - name: gateway domains: - gateway diff --git a/web_common/web_common/styles/main.scss b/web_common/web_common/styles/main.scss index 6094c726312..e21f87b91ee 100644 --- a/web_common/web_common/styles/main.scss +++ b/web_common/web_common/styles/main.scss @@ -26,7 +26,7 @@ h1, h2, h3, h4, h5, h6 { input { /* Not sure why, input elements in tables sometimes don't display - border without this, see workshop.hail.is/login page. */ + border without this. */ padding: 1px; } diff --git a/web_common/web_common/templates/header.html b/web_common/web_common/templates/header.html index 4237ebf4ab6..690b67743b3 100644 --- a/web_common/web_common/templates/header.html +++ b/web_common/web_common/templates/header.html @@ -53,21 +53,6 @@ {% endif %} - {% if userdata['is_developer'] == 1 %} -
- -
- {% else %} - Notebook - {% endif %} - {% if userdata['is_developer'] == 1 %}
diff --git a/web_common/web_common/templates/layout.html b/web_common/web_common/templates/layout.html index 7931b3d7c86..24dd61864f4 100644 --- a/web_common/web_common/templates/layout.html +++ b/web_common/web_common/templates/layout.html @@ -13,11 +13,7 @@
- {% if notebook_service is defined and notebook_service == 'workshop' %} - {% include 'workshop/header.html' %} - {% else %} {% include 'header.html' %} - {% endif %}
{% if message is defined %}
diff --git a/web_common/web_common/web_common.py b/web_common/web_common/web_common.py index c5da5c98242..9d3ea44337e 100644 --- a/web_common/web_common/web_common.py +++ b/web_common/web_common/web_common.py @@ -59,8 +59,6 @@ def base_context(session, userdata, service): 'base_path': deploy_config.base_path(service), 'base_url': deploy_config.external_url(service, ''), 'www_base_url': deploy_config.external_url('www', ''), - 'notebook_base_url': deploy_config.external_url('notebook', ''), - 'workshop_base_url': deploy_config.external_url('workshop', ''), 'auth_base_url': deploy_config.external_url('auth', ''), 'batch_base_url': deploy_config.external_url('batch', ''), 'batch_driver_base_url': deploy_config.external_url('batch-driver', ''), diff --git a/website/website/templates/nav-top.html b/website/website/templates/nav-top.html index 233d55e50dd..b235a9e3cec 100644 --- a/website/website/templates/nav-top.html +++ b/website/website/templates/nav-top.html @@ -28,9 +28,6 @@ -