From 84fe71537bb248d7dd12141b7a2283dada1a81de Mon Sep 17 00:00:00 2001 From: Jayoung Kim Ryu Date: Mon, 10 Jun 2024 13:12:41 -0400 Subject: [PATCH] Created using Colab --- docs/ReporterScreen_api.ipynb | 3894 +++++++++++++++++++++++---------- 1 file changed, 2754 insertions(+), 1140 deletions(-) mode change 100755 => 100644 docs/ReporterScreen_api.ipynb diff --git a/docs/ReporterScreen_api.ipynb b/docs/ReporterScreen_api.ipynb old mode 100755 new mode 100644 index bda7bd5..aeef10f --- a/docs/ReporterScreen_api.ipynb +++ b/docs/ReporterScreen_api.ipynb @@ -1,228 +1,2205 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "metadata": { "id": "OhCQ5Qon-b8L" }, "source": [ - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tsbLl_yB9lcC_lo4sjwEoI0r1KAjPkoB)" + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1tsbLl_yB9lcC_lo4sjwEoI0r1KAjPkoB)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qETZ0KgbbJIe" + }, + "source": [ + "# CRISPR Reporter Screen analysis with BEAN" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GQv2gC9IbOak" + }, + "source": [ + "In this tutorial, we will cover\n", + "* Obtaining target variant editing rate from allele count information\n", + "* ReporterScreen object slicing (indexing)\n", + "* Allele filtering based on significance\n", + "\n", + "For the basic API of ReporterScreen object, see [**perturb-tools tutorial of Screen API**](https://github.com/pinellolab/perturb-tools/blob/main/perturb_tools/screen_demo.ipynb)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "b_HqduZRy3DY" + }, + "outputs": [], + "source": [ + "! pip install -q crispr-bean" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "q4U4twaAzLPs" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import bean as be\n", + "import logging\n", + "\n", + "logging.getLogger('matplotlib.font_manager').disabled = True" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "8KhlwSn_2x9P", + "outputId": "d5392c81-6fa2-4071-fa93-b45c94e7fd45" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2024-06-10 17:08:18-- https://github.com/pinellolab/crispr-bean/raw/main/tests/data/var_mini_screen.h5ad\n", + "Resolving github.com (github.com)... 140.82.116.4\n", + "Connecting to github.com (github.com)|140.82.116.4|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/pinellolab/crispr-bean/main/tests/data/var_mini_screen.h5ad [following]\n", + "--2024-06-10 17:08:18-- https://raw.githubusercontent.com/pinellolab/crispr-bean/main/tests/data/var_mini_screen.h5ad\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 1452304 (1.4M) [application/octet-stream]\n", + "Saving to: ‘var_mini_screen.h5ad.1’\n", + "\n", + "var_mini_screen.h5a 100%[===================>] 1.38M --.-KB/s in 0.04s \n", + "\n", + "2024-06-10 17:08:19 (32.9 MB/s) - ‘var_mini_screen.h5ad.1’ saved [1452304/1452304]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://github.com/pinellolab/crispr-bean/raw/main/tests/data/var_mini_screen.h5ad" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "yBRxkKbLbD4F" + }, + "outputs": [], + "source": [ + "bdata = be.read_h5ad(\"var_mini_screen.h5ad\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P19G6BY2CODb" + }, + "source": [ + "## Data Structure" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lNOwgB5LCODb" + }, + "source": [ + "ReporterScreen object is a shallow wrapper around [AnnData](https://anndata.readthedocs.io/en/latest/). More comprehensive data wrangling documentation can be found in their documentation." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-mRAscHoCODb", + "outputId": "a2532377-9d2e-4cb2-f4bd-0675b639137a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Genome Editing Screen comprised of n_guides x n_conditions = 30 x 10\n", + " guides: 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'target_group', 'sequence', 'reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'chrom', 'genomic_pos'\n", + " samples: 'condition', 'replicate', 'lower_quantile', 'upper_quantile'\n", + " samples_m: \n", + " samples_p: \n", + " layers: 'X_bcmatch', 'edits'\n", + " uns: 'allele_counts', 'edit_counts', 'target_base_changes', 'tiling'" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "bdata" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pxt9EHSeCODc" + }, + "source": [ + "* `ReporterScreen.X`: guide count\n", + "* `ReporterScreen.guides`: guide metadata. Reference to `AnnData.obs`\n", + "* `ReporterScreen.samples`: sample/condition metadata. Reference to `AnnData.var`.\n", + "* `ReporterScreen.layers[\"X_bcmatch\"]`: barcode-matched guide counts\n", + "* `ReporterScreen.layers[\"edits\"]`: edit counts\n", + "* `ReporterScreen.uns[\"allele_counts\"]`: allele counts per guide and condition\n", + "* `ReporterScreen.uns[\"edit_counts\"]`: edit counts per guide and condition" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4d8WpxcuCODc" + }, + "source": [ + "`.guides` attribute contains the information about each guide." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 356 + }, + "id": "fQBYVENBCODd", + "outputId": "c7cbbda1-c71a-418f-ce5e-20b0a7461617" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 Target gene/variant Target descriptor \\\n", + "name \n", + "CONTROL_8_g1 90 CONTROL NaN \n", + "CONTROL_8_g2 91 CONTROL NaN \n", + "CONTROL_8_g3 92 CONTROL NaN \n", + "CONTROL_8_g4 93 CONTROL NaN \n", + "CONTROL_8_g5 94 CONTROL NaN \n", + "\n", + " Arbitrary number gRNA position category \\\n", + "name \n", + "CONTROL_8_g1 8 g1 \n", + "CONTROL_8_g2 8 g2 \n", + "CONTROL_8_g3 8 g3 \n", + "CONTROL_8_g4 8 g4 \n", + "CONTROL_8_g5 8 g5 \n", + "\n", + " Target base position in gRNA Target base position in reporter \\\n", + "name \n", + "CONTROL_8_g1 4 10 \n", + "CONTROL_8_g2 5 11 \n", + "CONTROL_8_g3 5 12 \n", + "CONTROL_8_g4 7 13 \n", + "CONTROL_8_g5 8 14 \n", + "\n", + " BE target_group sequence ... barcode 5-nt PAM \\\n", + "name ... \n", + "CONTROL_8_g1 ABE NegCtrl AAAATTATCGGAAACGGTAG ... GAAC AATCT \n", + "CONTROL_8_g2 ABE NegCtrl AAAAATTATCGGAAACGGTA ... CGTG GAATC \n", + "CONTROL_8_g3 ABE NegCtrl AAAAATTATCGGAAACGGT ... ATCA AGAAT \n", + "CONTROL_8_g4 ABE NegCtrl CGAAAAATTATCGGAAACGG ... CAAG TAGAA \n", + "CONTROL_8_g5 ABE NegCtrl TCGAAAAATTATCGGAAACG ... TTCA GTAGA \n", + "\n", + " offset target target_pos Group2 masked_sequence \\\n", + "name \n", + "CONTROL_8_g1 -10 CONTROL_8 9 NegCtrl GGGGTTGTCGGGGGCGGTGG \n", + "CONTROL_8_g2 -11 CONTROL_8 10 NegCtrl GGGGGTTGTCGGGGGCGGTG \n", + "CONTROL_8_g3 -12 CONTROL_8 11 NegCtrl GGGGGTTGTCGGGGGCGGT \n", + "CONTROL_8_g4 -13 CONTROL_8 12 NegCtrl CGGGGGGTTGTCGGGGGCGG \n", + "CONTROL_8_g5 -14 CONTROL_8 13 NegCtrl TCGGGGGGTTGTCGGGGGCG \n", + "\n", + " masked_barcode chrom genomic_pos \n", + "name \n", + "CONTROL_8_g1 GGGC NaN NaN \n", + "CONTROL_8_g2 CGTG NaN NaN \n", + "CONTROL_8_g3 GTCG NaN NaN \n", + "CONTROL_8_g4 CGGG NaN NaN \n", + "CONTROL_8_g5 TTCG NaN NaN \n", + "\n", + "[5 rows x 21 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0Target gene/variantTarget descriptorArbitrary numbergRNA position categoryTarget base position in gRNATarget base position in reporterBEtarget_groupsequence...barcode5-nt PAMoffsettargettarget_posGroup2masked_sequencemasked_barcodechromgenomic_pos
name
CONTROL_8_g190CONTROLNaN8g1410ABENegCtrlAAAATTATCGGAAACGGTAG...GAACAATCT-10CONTROL_89NegCtrlGGGGTTGTCGGGGGCGGTGGGGGCNaNNaN
CONTROL_8_g291CONTROLNaN8g2511ABENegCtrlAAAAATTATCGGAAACGGTA...CGTGGAATC-11CONTROL_810NegCtrlGGGGGTTGTCGGGGGCGGTGCGTGNaNNaN
CONTROL_8_g392CONTROLNaN8g3512ABENegCtrlAAAAATTATCGGAAACGGT...ATCAAGAAT-12CONTROL_811NegCtrlGGGGGTTGTCGGGGGCGGTGTCGNaNNaN
CONTROL_8_g493CONTROLNaN8g4713ABENegCtrlCGAAAAATTATCGGAAACGG...CAAGTAGAA-13CONTROL_812NegCtrlCGGGGGGTTGTCGGGGGCGGCGGGNaNNaN
CONTROL_8_g594CONTROLNaN8g5814ABENegCtrlTCGAAAAATTATCGGAAACG...TTCAGTAGA-14CONTROL_813NegCtrlTCGGGGGGTTGTCGGGGGCGTTCGNaNNaN
\n", + "

5 rows × 21 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe" + } + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "bdata.guides.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4p8Gc6iXCODd" + }, + "source": [ + "`.samples` attribute contains the sample and condition specific information." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "id": "8xFR2-QLCODd", + "outputId": "1e3da2ba-65b1-489e-ac42-4975e8f96a1e" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " condition replicate lower_quantile upper_quantile\n", + "rep5_top top rep5 0.8 1.0\n", + "rep5_high high rep5 0.6 0.8\n", + "rep5_bulk bulk rep5 0.0 1.0\n", + "rep5_low low rep5 0.2 0.4\n", + "rep5_bot bot rep5 0.0 0.2\n", + "rep6_top top rep6 0.8 1.0\n", + "rep6_high high rep6 0.6 0.8\n", + "rep6_bulk bulk rep6 0.0 1.0\n", + "rep6_low low rep6 0.2 0.4\n", + "rep6_bot bot rep6 0.0 0.2" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
conditionreplicatelower_quantileupper_quantile
rep5_toptoprep50.81.0
rep5_highhighrep50.60.8
rep5_bulkbulkrep50.01.0
rep5_lowlowrep50.20.4
rep5_botbotrep50.00.2
rep6_toptoprep60.81.0
rep6_highhighrep60.60.8
rep6_bulkbulkrep60.01.0
rep6_lowlowrep60.20.4
rep6_botbotrep60.00.2
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"bdata\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"condition\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"high\",\n \"bot\",\n \"bulk\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"replicate\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"rep6\",\n \"rep5\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"lower_quantile\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.34253953543107013,\n \"min\": 0.0,\n \"max\": 0.8,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.6,\n 0.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"upper_quantile\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.34253953543107013,\n \"min\": 0.2,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 0.8,\n 0.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "bdata.samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kel9p4x1CODe" + }, + "source": [ + "Per-guide allele count information is stored in `.uns['allele_counts']`." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 444 + }, + "id": "4Q6jF9DgbD4K", + "outputId": "38a1f3d1-4898-44e6-97cc-f6406c34a837" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " guide allele \\\n", + "0 ACAT2_SA_45_g4 1:12:+:A>T \n", + "1 ACAT2_SA_45_g4 12:23:+:A>G \n", + "2 ACAT2_SA_45_g4 1:12:+:A>G \n", + "3 ACAT2_SA_45_g4 14:25:+:A>G \n", + "4 ACAT2_SA_45_g4 1:12:+:A>G,12:23:+:A>G,15:26:+:A>G \n", + "... ... ... \n", + "4921 ACAT2_SA_47_g2 1:10:+:A>G,6:15:+:A>G,18:27:+:A>T,20:29:+:A>G \n", + "4922 ACAT2_SA_47_g2 16:25:+:C>T \n", + "4923 ACAT2_SA_47_g1 -8:0:+:T>-,7:15:+:A>G,8:16:+:A>G,9:17:+:A>G,14... \n", + "4924 ACAT2_SA_47_g1 1:9:+:A>G,6:14:+:A>G,9:17:+:A>G,13:21:+:A>G,17... \n", + "4925 CONTROL_8_g4 -11:0:+:A>G,0:11:+:A>G,4:15:+:A>G,16:27:+:A>G,... \n", + "\n", + " rep5_top rep5_high rep5_bulk rep5_low rep5_bot rep6_top rep6_high \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 2 19 26 13 2 40 203 \n", + "2 30 21 23 73 6 6 18 \n", + "3 4 1 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "4921 0 0 0 0 1 0 0 \n", + "4922 0 0 0 0 1 0 0 \n", + "4923 0 0 0 0 1 0 0 \n", + "4924 0 0 0 0 1 0 0 \n", + "4925 0 0 0 0 1 0 0 \n", + "\n", + " rep6_bulk rep6_low rep6_bot \n", + "0 0 0 0 \n", + "1 43 210 67 \n", + "2 0 1 4 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "... ... ... ... \n", + "4921 0 0 0 \n", + "4922 0 0 0 \n", + "4923 0 0 0 \n", + "4924 0 0 0 \n", + "4925 0 0 0 \n", + "\n", + "[4926 rows x 12 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
guideallelerep5_toprep5_highrep5_bulkrep5_lowrep5_botrep6_toprep6_highrep6_bulkrep6_lowrep6_bot
0ACAT2_SA_45_g41:12:+:A>T0000000000
1ACAT2_SA_45_g412:23:+:A>G21926132402034321067
2ACAT2_SA_45_g41:12:+:A>G302123736618014
3ACAT2_SA_45_g414:25:+:A>G4100000000
4ACAT2_SA_45_g41:12:+:A>G,12:23:+:A>G,15:26:+:A>G0000000000
.......................................
4921ACAT2_SA_47_g21:10:+:A>G,6:15:+:A>G,18:27:+:A>T,20:29:+:A>G0000100000
4922ACAT2_SA_47_g216:25:+:C>T0000100000
4923ACAT2_SA_47_g1-8:0:+:T>-,7:15:+:A>G,8:16:+:A>G,9:17:+:A>G,14...0000100000
4924ACAT2_SA_47_g11:9:+:A>G,6:14:+:A>G,9:17:+:A>G,13:21:+:A>G,17...0000100000
4925CONTROL_8_g4-11:0:+:A>G,0:11:+:A>G,4:15:+:A>G,16:27:+:A>G,...0000100000
\n", + "

4926 rows × 12 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"bdata\",\n \"rows\": 4926,\n \"fields\": [\n {\n \"column\": \"guide\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 30,\n \"samples\": [\n \"ACAT2_SA_45_g1\",\n \"ACAT2_SA_45_g3\",\n \"ACAT2_SA_47_g4\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"allele\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4705,\n \"samples\": [\n \"-4:7:+:A>G,10:21:+:C>G,11:22:+:A>G\",\n \"-4:7:+:A>G,-2:9:+:A>G,7:18:+:A>G\",\n \"-2:8:+:A>G,0:10:+:A>G,11:21:+:A>G\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8,\n \"min\": 0,\n \"max\": 180,\n \"num_unique_values\": 68,\n \"samples\": [\n 60,\n 25,\n 21\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n \"max\": 120,\n \"num_unique_values\": 39,\n \"samples\": [\n 13,\n 24,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 0,\n \"max\": 214,\n \"num_unique_values\": 59,\n \"samples\": [\n 0,\n 5,\n 47\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11,\n \"min\": 0,\n \"max\": 309,\n \"num_unique_values\": 84,\n \"samples\": [\n 81,\n 0,\n 19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 35,\n \"num_unique_values\": 18,\n \"samples\": [\n 0,\n 2,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19,\n \"min\": 0,\n \"max\": 1061,\n \"num_unique_values\": 60,\n \"samples\": [\n 0,\n 5,\n 16\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 0,\n \"max\": 2446,\n \"num_unique_values\": 99,\n \"samples\": [\n 67,\n 241,\n 198\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 35,\n \"min\": 0,\n \"max\": 1854,\n \"num_unique_values\": 79,\n \"samples\": [\n 618,\n 0,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 47,\n \"min\": 0,\n \"max\": 2507,\n \"num_unique_values\": 72,\n \"samples\": [\n 6,\n 159,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 26,\n \"min\": 0,\n \"max\": 1446,\n \"num_unique_values\": 64,\n \"samples\": [\n 47,\n 250,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 8 + } + ], + "source": [ + "bdata.uns[\"allele_counts\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i6YgMBI5CODf" + }, + "source": [ + "Per-variant aggregated count information is stored in `.uns['edit_counts']`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 444 + }, + "id": "STNeyq_GCODf", + "outputId": "3ee17791-8e3e-41d5-88f6-d2ed41564a08" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " guide edit rep5_top rep5_high rep5_bulk rep5_low \\\n", + "0 ACAT2_SA_44_g1 -4:4:+:A>G 154 53 32 235 \n", + "1 ACAT2_SA_44_g1 11:19:+:A>G 14 44 63 161 \n", + "2 ACAT2_SA_44_g1 1:9:+:A>G 37 65 68 195 \n", + "3 ACAT2_SA_44_g1 22:30:+:A>G 11 19 33 167 \n", + "4 ACAT2_SA_44_g1 6:14:+:C>A 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "1761 ACAT2_SA_46_g5 0:12:+:T>C 0 0 0 1 \n", + "1762 ACAT2_SA_47_g2 -9:0:+:C>- 0 0 0 1 \n", + "1763 ACAT2_SA_47_g2 22:31:+:->A 0 0 0 1 \n", + "1764 CONTROL_8_g1 7:15:+:G>A 0 0 0 1 \n", + "1765 ACAT2_SA_47_g2 16:25:+:C>T 0 0 0 0 \n", + "\n", + " rep5_bot rep6_top rep6_high rep6_bulk rep6_low rep6_bot \n", + "0 10 85 167 232 219 79 \n", + "1 6 77 88 51 29 72 \n", + "2 18 21 42 63 50 23 \n", + "3 3 0 0 0 1 18 \n", + "4 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "1761 0 0 0 0 0 0 \n", + "1762 0 0 0 0 0 0 \n", + "1763 0 0 0 0 0 0 \n", + "1764 0 0 0 0 0 0 \n", + "1765 1 0 0 0 0 0 \n", + "\n", + "[1766 rows x 12 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
guideeditrep5_toprep5_highrep5_bulkrep5_lowrep5_botrep6_toprep6_highrep6_bulkrep6_lowrep6_bot
0ACAT2_SA_44_g1-4:4:+:A>G1545332235108516723221979
1ACAT2_SA_44_g111:19:+:A>G14446316167788512972
2ACAT2_SA_44_g11:9:+:A>G376568195182142635023
3ACAT2_SA_44_g122:30:+:A>G1119331673000118
4ACAT2_SA_44_g16:14:+:C>A0000000000
.......................................
1761ACAT2_SA_46_g50:12:+:T>C0001000000
1762ACAT2_SA_47_g2-9:0:+:C>-0001000000
1763ACAT2_SA_47_g222:31:+:->A0001000000
1764CONTROL_8_g17:15:+:G>A0001000000
1765ACAT2_SA_47_g216:25:+:C>T0000100000
\n", + "

1766 rows × 12 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"bdata\",\n \"rows\": 1766,\n \"fields\": [\n {\n \"column\": \"guide\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 30,\n \"samples\": [\n \"CONTROL_9_g3\",\n \"ACAT2_SA_47_g1\",\n \"CONTROL_8_g4\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"edit\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1183,\n \"samples\": [\n \"-7:5:+:C>A\",\n \"-4:6:+:G>A\",\n \"17:29:+:T>C\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 32,\n \"min\": 0,\n \"max\": 356,\n \"num_unique_values\": 101,\n \"samples\": [\n 3,\n 52,\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13,\n \"min\": 0,\n \"max\": 139,\n \"num_unique_values\": 71,\n \"samples\": [\n 10,\n 53,\n 15\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 0,\n \"max\": 354,\n \"num_unique_values\": 85,\n \"samples\": [\n 88,\n 32,\n 23\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 46,\n \"min\": 0,\n \"max\": 622,\n \"num_unique_values\": 131,\n \"samples\": [\n 41,\n 194,\n 243\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 0,\n \"max\": 54,\n \"num_unique_values\": 38,\n \"samples\": [\n 4,\n 19,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 46,\n \"min\": 0,\n \"max\": 1295,\n \"num_unique_values\": 87,\n \"samples\": [\n 136,\n 85,\n 15\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 2927,\n \"num_unique_values\": 119,\n \"samples\": [\n 3,\n 60,\n 1381\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 100,\n \"min\": 0,\n \"max\": 2411,\n \"num_unique_values\": 101,\n \"samples\": [\n 104,\n 46,\n 414\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117,\n \"min\": 0,\n \"max\": 3100,\n \"num_unique_values\": 94,\n \"samples\": [\n 916,\n 73,\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 62,\n \"min\": 0,\n \"max\": 1563,\n \"num_unique_values\": 92,\n \"samples\": [\n 24,\n 67,\n 69\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 9 + } + ], + "source": [ + "bdata.uns[\"edit_counts\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hv1DMzijCODf" + }, + "source": [ + "## Changing column names" ] }, { "cell_type": "markdown", "metadata": { - "id": "qETZ0KgbbJIe" + "id": "8VFtHH4ECODf" + }, + "source": [ + "`ReporterScreen.guides` and `ReporterScreen.var` are equivalent to `AnnData.obs` and `AnnData.var`, which are Pandas DataFrames and can be [manipulated as the DataFrames](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html). For example, column names can be changed as in Pandas:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "mu-7UmyCCODf" }, + "outputs": [], "source": [ - "# CRISPR Reporter Screen analysis with bean" + "bdata.guides = bdata.guides.rename(columns={\"Reporter\":\"reporter\"})" ] }, { "cell_type": "markdown", "metadata": { - "id": "GQv2gC9IbOak" + "id": "A_1ERHsEbD4R" }, "source": [ - "In this tutorial, we will cover\n", - "* Obtaining target variant editing rate from allele count information\n", - "* ReporterScreen object slicing (indexing)\n", - "* Allele filtering based on significance\n", - "\n", - "For the basic API of ReporterScreen object, see [**perturb-tools tutorial of Screen API**](https://github.com/pinellolab/perturb-tools/blob/main/perturb_tools/screen_demo.ipynb)." + "## Subsetting & addition\n", + "\n" ] }, { - "cell_type": "code", - "execution_count": 2, + "cell_type": "markdown", "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "b_HqduZRy3DY", - "outputId": "eec31693-93b0-4266-8c30-a9902e7da4cd" + "id": "WCCNOXtQbD4S" + }, + "source": [ + "Works as anndata, supports allele & edit count operations.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wLQd6h_ubD4S" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting beans==0.1.2\n", - " Downloading beans-0.1.2.tar.gz (464 kB)\n", - "\u001b[K |████████████████████████████████| 464 kB 4.1 MB/s \n", - "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", - " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", - " Preparing wheel metadata ... \u001b[?25l\u001b[?25hdone\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from beans==0.1.2) (1.21.6)\n", - "Collecting perturb-tools>=0.0.16\n", - " Downloading perturb-tools-0.1.4.tar.gz (147 kB)\n", - "\u001b[K |████████████████████████████████| 147 kB 51.2 MB/s \n", - "\u001b[?25hCollecting matplotlib>=3.4\n", - " Downloading matplotlib-3.5.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)\n", - "\u001b[K |████████████████████████████████| 11.2 MB 51.2 MB/s \n", - "\u001b[?25hCollecting anndata>=0.7.1\n", - " Downloading anndata-0.8.0-py3-none-any.whl (96 kB)\n", - "\u001b[K |████████████████████████████████| 96 kB 4.9 MB/s \n", - "\u001b[?25hRequirement already satisfied: pandas>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from perturb-tools>=0.0.16->beans==0.1.2) (1.3.5)\n", - "Collecting biopython>=1.79\n", - " Downloading biopython-1.79-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (2.3 MB)\n", - "\u001b[K |████████████████████████████████| 2.3 MB 57.1 MB/s \n", - "\u001b[?25hCollecting pool-sharq>=0.0.12\n", - " Downloading pool_sharq-0.0.12-py3-none-any.whl (7.8 kB)\n", - "Requirement already satisfied: plotly in /usr/local/lib/python3.7/dist-packages (from perturb-tools>=0.0.16->beans==0.1.2) (5.5.0)\n", - "Requirement already satisfied: regex in /usr/local/lib/python3.7/dist-packages (from perturb-tools>=0.0.16->beans==0.1.2) (2022.6.2)\n", - "Requirement already satisfied: typing_extensions in /usr/local/lib/python3.7/dist-packages (from anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (4.1.1)\n", - "Requirement already satisfied: importlib_metadata>=0.7 in /usr/local/lib/python3.7/dist-packages (from anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (4.12.0)\n", - "Requirement already satisfied: packaging>=20 in /usr/local/lib/python3.7/dist-packages (from anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (21.3)\n", - "Requirement already satisfied: scipy>1.4 in /usr/local/lib/python3.7/dist-packages (from anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (1.7.3)\n", - "Requirement already satisfied: natsort in /usr/local/lib/python3.7/dist-packages (from anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (5.5.0)\n", - "Requirement already satisfied: h5py>=3 in /usr/local/lib/python3.7/dist-packages (from anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (3.1.0)\n", - "Requirement already satisfied: cached-property in /usr/local/lib/python3.7/dist-packages (from h5py>=3->anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (1.5.2)\n", - "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib_metadata>=0.7->anndata>=0.7.1->perturb-tools>=0.0.16->beans==0.1.2) (3.8.1)\n", - "Collecting fonttools>=4.22.0\n", - " Downloading fonttools-4.37.3-py3-none-any.whl (959 kB)\n", - "\u001b[K |████████████████████████████████| 959 kB 44.2 MB/s \n", - "\u001b[?25hRequirement already satisfied: pyparsing>=2.2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.4->perturb-tools>=0.0.16->beans==0.1.2) (3.0.9)\n", - "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.4->perturb-tools>=0.0.16->beans==0.1.2) (1.4.4)\n", - "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.4->perturb-tools>=0.0.16->beans==0.1.2) (0.11.0)\n", - "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.4->perturb-tools>=0.0.16->beans==0.1.2) (7.1.2)\n", - "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.7/dist-packages (from matplotlib>=3.4->perturb-tools>=0.0.16->beans==0.1.2) (2.8.2)\n", - "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas>=1.1.2->perturb-tools>=0.0.16->beans==0.1.2) (2022.2.1)\n", - "Collecting licorice>=0.0.2\n", - " Downloading licorice-0.0.3-py3-none-any.whl (8.1 kB)\n", - "Collecting requests>=2.26.0\n", - " Downloading requests-2.28.1-py3-none-any.whl (62 kB)\n", - "\u001b[K |████████████████████████████████| 62 kB 1.4 MB/s \n", - "\u001b[?25hRequirement already satisfied: tqdm>=4.62.3 in /usr/local/lib/python3.7/dist-packages (from pool-sharq>=0.0.12->perturb-tools>=0.0.16->beans==0.1.2) (4.64.1)\n", - "Collecting pyrequisites>=0.0.2\n", - " Downloading pyrequisites-0.0.2-py3-none-any.whl (4.1 kB)\n", - "Collecting beautifulsoup4>=4.10.0\n", - " Downloading beautifulsoup4-4.11.1-py3-none-any.whl (128 kB)\n", - "\u001b[K |████████████████████████████████| 128 kB 66.4 MB/s \n", - "\u001b[?25hCollecting soupsieve>1.2\n", - " Downloading soupsieve-2.3.2.post1-py3-none-any.whl (37 kB)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7->matplotlib>=3.4->perturb-tools>=0.0.16->beans==0.1.2) (1.15.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26.0->pool-sharq>=0.0.12->perturb-tools>=0.0.16->beans==0.1.2) (2.10)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26.0->pool-sharq>=0.0.12->perturb-tools>=0.0.16->beans==0.1.2) (1.24.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26.0->pool-sharq>=0.0.12->perturb-tools>=0.0.16->beans==0.1.2) (2022.6.15)\n", - "Requirement already satisfied: charset-normalizer<3,>=2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.26.0->pool-sharq>=0.0.12->perturb-tools>=0.0.16->beans==0.1.2) (2.1.1)\n", - "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.7/dist-packages (from plotly->perturb-tools>=0.0.16->beans==0.1.2) (8.0.1)\n", - "Building wheels for collected packages: beans, perturb-tools\n", - " Building wheel for beans (PEP 517) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for beans: filename=beans-0.1.2-cp37-cp37m-linux_x86_64.whl size=506725 sha256=43eb6b2b41a112f7ea2d37a17e1e9bd91dffd4599710f3958769f84e59b3e6b8\n", - " Stored in directory: /root/.cache/pip/wheels/68/81/58/49b78bc024e2a5312dc830797a25c15c7acda4225b5b25d232\n", - " Building wheel for perturb-tools (setup.py) ... \u001b[?25l\u001b[?25hdone\n", - " Created wheel for perturb-tools: filename=perturb_tools-0.1.4-py3-none-any.whl size=54209 sha256=e383c22165c92cf7c22605aec1da8689f60d55d1c1ea530a925aa89e5ee6608d\n", - " Stored in directory: /root/.cache/pip/wheels/35/75/47/47a3d25b1958a24d7d3ee488b8956a9f43bda0bb0761b4d67c\n", - "Successfully built beans perturb-tools\n", - "Installing collected packages: soupsieve, licorice, requests, pyrequisites, fonttools, beautifulsoup4, pool-sharq, matplotlib, biopython, anndata, perturb-tools, beans\n", - " Attempting uninstall: requests\n", - " Found existing installation: requests 2.23.0\n", - " Uninstalling requests-2.23.0:\n", - " Successfully uninstalled requests-2.23.0\n", - " Attempting uninstall: beautifulsoup4\n", - " Found existing installation: beautifulsoup4 4.6.3\n", - " Uninstalling beautifulsoup4-4.6.3:\n", - " Successfully uninstalled beautifulsoup4-4.6.3\n", - " Attempting uninstall: matplotlib\n", - " Found existing installation: matplotlib 3.2.2\n", - " Uninstalling matplotlib-3.2.2:\n", - " Successfully uninstalled matplotlib-3.2.2\n", - "Successfully installed anndata-0.8.0 beautifulsoup4-4.11.1 beans-0.1.2 biopython-1.79 fonttools-4.37.3 licorice-0.0.3 matplotlib-3.5.3 perturb-tools-0.1.4 pool-sharq-0.0.12 pyrequisites-0.0.2 requests-2.28.1 soupsieve-2.3.2.post1\n" - ] - }, - { - "data": { - "application/vnd.colab-display-data+json": { - "pip_warning": { - "packages": [ - "matplotlib", - "mpl_toolkits" - ] - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ - "! pip install beans==0.1.2" + "### Subsetting & selection\n" ] }, + { + "cell_type": "markdown", + "source": [ + "ReporterScreen can be subsetted for rows (guides) / selected for columns (samples) [as in AnnData](https://anndata.readthedocs.io/en/latest/tutorials/notebooks/getting-started.html#Subsetting-AnnData)." + ], + "metadata": { + "id": "4y95aTXRF6LG" + } + }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 11, "metadata": { - "id": "q4U4twaAzLPs" + "id": "DDVh1WtrbD4T" }, "outputs": [], "source": [ - "import numpy as np\n", - "import pandas as pd\n", - "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "import bean as be" + "bdata_subset = bdata[:10,bdata.samples.condition == \"bulk\"]" ] }, { "cell_type": "code", - "execution_count": 33, + "source": [ + "bdata_subset" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, - "id": "8KhlwSn_2x9P", - "outputId": "1fc11a18-6e01-403d-9e08-73da37eee332" + "id": "iXyQLIqlF25z", + "outputId": "c0fcaebc-b785-42ec-cf68-6fc1a902bc39" }, + "execution_count": 12, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading...\n", - "From: https://drive.google.com/uc?id=18Azb8YmmMvFZo9urc2TxZr540xXWOv_v\n", - "To: /content/bean_count_072121_ABE_topbot_LDLvar.h5ad\n", - "100% 40.2M/40.2M [00:00<00:00, 160MB/s]\n" - ] + "output_type": "execute_result", + "data": { + "text/plain": [ + "Genome Editing Screen comprised of n_guides x n_conditions = 10 x 2\n", + " guides: 'Unnamed: 0', 'Target gene/variant', 'Target descriptor', 'Arbitrary number', 'gRNA position category', 'Target base position in gRNA', 'Target base position in reporter', 'BE', 'target_group', 'sequence', 'reporter', 'barcode', '5-nt PAM', 'offset', 'target', 'target_pos', 'Group2', 'masked_sequence', 'masked_barcode', 'chrom', 'genomic_pos'\n", + " samples: 'condition', 'replicate', 'lower_quantile', 'upper_quantile'\n", + " samples_m: \n", + " samples_p: \n", + " layers: 'X_bcmatch', 'edits'\n", + " uns: 'allele_counts', 'edit_counts', 'target_base_changes', 'tiling'" + ] + }, + "metadata": {}, + "execution_count": 12 } - ], - "source": [ - "!gdown 18Azb8YmmMvFZo9urc2TxZr540xXWOv_v" ] }, { - "cell_type": "code", - "execution_count": 2, + "cell_type": "markdown", "metadata": { - "id": "yBRxkKbLbD4F" + "id": "6wzebZzsbD4a" }, - "outputs": [], "source": [ - "bdata = be.read_h5ad(\"bean_count_072121_ABE_topbot_LDLvar.h5ad\")" + "## Getting edit rates from allele counts\n", + "\n" ] }, + { + "cell_type": "markdown", + "source": [ + "`ReporterScreen.uns[\"edit_counts\"]` is derived from `ReporterScreen.uns[\"allele_counts\"]`, then used to generated per-guide or per-window editing rate." + ], + "metadata": { + "id": "qKEu9Jb_GZxZ" + } + }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 488 + "height": 444 }, - "id": "4Q6jF9DgbD4K", - "outputId": "7abf9b29-2755-456e-fd62-5cb69a4fc27c" + "id": "GMV9enV35HKu", + "outputId": "6f36f0a6-e30d-4822-d261-5ca99a08fca2" }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " guide allele \\\n", + "0 ACAT2_SA_45_g4 1:12:+:A>T \n", + "1 ACAT2_SA_45_g4 12:23:+:A>G \n", + "2 ACAT2_SA_45_g4 1:12:+:A>G \n", + "3 ACAT2_SA_45_g4 14:25:+:A>G \n", + "4 ACAT2_SA_45_g4 1:12:+:A>G,12:23:+:A>G,15:26:+:A>G \n", + "... ... ... \n", + "4921 ACAT2_SA_47_g2 1:10:+:A>G,6:15:+:A>G,18:27:+:A>T,20:29:+:A>G \n", + "4922 ACAT2_SA_47_g2 16:25:+:C>T \n", + "4923 ACAT2_SA_47_g1 -8:0:+:T>-,7:15:+:A>G,8:16:+:A>G,9:17:+:A>G,14... \n", + "4924 ACAT2_SA_47_g1 1:9:+:A>G,6:14:+:A>G,9:17:+:A>G,13:21:+:A>G,17... \n", + "4925 CONTROL_8_g4 -11:0:+:A>G,0:11:+:A>G,4:15:+:A>G,16:27:+:A>G,... \n", + "\n", + " rep5_top rep5_high rep5_bulk rep5_low rep5_bot rep6_top rep6_high \\\n", + "0 0 0 0 0 0 0 0 \n", + "1 2 19 26 13 2 40 203 \n", + "2 30 21 23 73 6 6 18 \n", + "3 4 1 0 0 0 0 0 \n", + "4 0 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "4921 0 0 0 0 1 0 0 \n", + "4922 0 0 0 0 1 0 0 \n", + "4923 0 0 0 0 1 0 0 \n", + "4924 0 0 0 0 1 0 0 \n", + "4925 0 0 0 0 1 0 0 \n", + "\n", + " rep6_bulk rep6_low rep6_bot \n", + "0 0 0 0 \n", + "1 43 210 67 \n", + "2 0 1 4 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "... ... ... ... \n", + "4921 0 0 0 \n", + "4922 0 0 0 \n", + "4923 0 0 0 \n", + "4924 0 0 0 \n", + "4925 0 0 0 \n", + "\n", + "[4926 rows x 12 columns]" + ], "text/html": [ "\n", - "
\n", - "
\n", - "
\n", + "
\n", + "
\n", "\n", "\n", - " \n", - "
\n", + " \n", "
\n", - " " + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " guide \\\n", - "0 LDLR_SA_3_g5 \n", - "1 LDLR_SA_3_g5 \n", - "2 LDLR_SA_3_g5 \n", - "3 LDLR_SA_3_g5 \n", - "4 LDLR_SA_3_g5 \n", - "... ... \n", - "165506 2:164588224GAG_Maj_ABE_10_g3 \n", - "165507 2:164588224GAG_Maj_ABE_10_g3 \n", - "165508 rs4921914_Min_ABE_501_g4 \n", - "165509 rs191388787_Maj_ABE_121_g2 \n", - "165510 rs113408797_Maj_ABE_41_g1 \n", - "\n", - " allele rep1_bot rep2_bot \\\n", - "0 26 13 \n", - "1 0:13:+:A>G 6 16 \n", - "2 -12:1:+:A>G,0:13:+:A>G 2 16 \n", - "3 0:13:+:A>G,7:20:+:A>G 2 0 \n", - "4 -12:1:+:A>G,-8:5:+:A>G,0:13:+:A>G,10:23:+:A>G 1 0 \n", - "... ... ... ... \n", - "165506 -10:1:+:C>T,-8:3:+:A>T 0 0 \n", - "165507 -8:3:+:A>T,-3:8:+:C>A,-1:10:+:G>A 0 0 \n", - "165508 -9:3:+:A>G,-8:4:+:A>G,-3:9:+:A>G,2:14:+:A>G,4:... 0 0 \n", - "165509 -7:3:+:A>G,-5:5:+:A>G,-3:7:+:A>G 0 0 \n", - "165510 -8:1:+:A>G,-1:8:+:A>G,3:12:+:A>G,5:14:+:A>G,17... 0 0 \n", - "\n", - " rep3_VPA_bot rep4_VPA_bot rep1_bulk rep2_bulk rep3_VPA_bulk \\\n", - "0 9 13 60 21 27 \n", - "1 11 24 29 21 28 \n", - "2 1 24 16 5 9 \n", - "3 0 0 0 0 0 \n", - "4 0 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "165506 0 0 0 0 0 \n", - "165507 0 0 0 0 0 \n", - "165508 0 0 0 0 0 \n", - "165509 0 0 0 0 0 \n", - "165510 0 0 0 0 0 \n", - "\n", - " rep4_VPA_bulk rep1_top rep2_top rep3_VPA_top rep4_VPA_top \n", - "0 31 32 26 43 43 \n", - "1 22 27 11 20 13 \n", - "2 4 17 12 7 3 \n", - "3 9 0 0 0 0 \n", - "4 0 0 0 0 0 \n", - "... ... ... ... ... ... \n", - "165506 0 0 0 0 1 \n", - "165507 0 0 0 0 1 \n", - "165508 0 0 0 0 1 \n", - "165509 0 0 0 0 1 \n", - "165510 0 0 0 0 1 \n", - "\n", - "[165511 rows x 14 columns]" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"bdata\",\n \"rows\": 4926,\n \"fields\": [\n {\n \"column\": \"guide\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 30,\n \"samples\": [\n \"ACAT2_SA_45_g1\",\n \"ACAT2_SA_45_g3\",\n \"ACAT2_SA_47_g4\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"allele\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4705,\n \"samples\": [\n \"-4:7:+:A>G,10:21:+:C>G,11:22:+:A>G\",\n \"-4:7:+:A>G,-2:9:+:A>G,7:18:+:A>G\",\n \"-2:8:+:A>G,0:10:+:A>G,11:21:+:A>G\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8,\n \"min\": 0,\n \"max\": 180,\n \"num_unique_values\": 68,\n \"samples\": [\n 60,\n 25,\n 21\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n \"max\": 120,\n \"num_unique_values\": 39,\n \"samples\": [\n 13,\n 24,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6,\n \"min\": 0,\n \"max\": 214,\n \"num_unique_values\": 59,\n \"samples\": [\n 0,\n 5,\n 47\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11,\n \"min\": 0,\n \"max\": 309,\n \"num_unique_values\": 84,\n \"samples\": [\n 81,\n 0,\n 19\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 35,\n \"num_unique_values\": 18,\n \"samples\": [\n 0,\n 2,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 19,\n \"min\": 0,\n \"max\": 1061,\n \"num_unique_values\": 60,\n \"samples\": [\n 0,\n 5,\n 16\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 51,\n \"min\": 0,\n \"max\": 2446,\n \"num_unique_values\": 99,\n \"samples\": [\n 67,\n 241,\n 198\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 35,\n \"min\": 0,\n \"max\": 1854,\n \"num_unique_values\": 79,\n \"samples\": [\n 618,\n 0,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 47,\n \"min\": 0,\n \"max\": 2507,\n \"num_unique_values\": 72,\n \"samples\": [\n 6,\n 159,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 26,\n \"min\": 0,\n \"max\": 1446,\n \"num_unique_values\": 64,\n \"samples\": [\n 47,\n 250,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 35, "metadata": {}, - "output_type": "execute_result" + "execution_count": 13 } ], "source": [ - "bdata.uns[\"allele_counts\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A_1ERHsEbD4R" - }, - "source": [ - "## Subsetting & addition\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WCCNOXtQbD4S" - }, - "source": [ - "Works as anndata, supports allele & edit count operations.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wLQd6h_ubD4S" - }, - "source": [ - "### Subsetting & selection\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "id": "jaUnqqXt3G2P" - }, - "outputs": [], - "source": [ - "bdata.samples[\"replicate\"], bdata.samples[\"sort\"] = zip(*bdata.samples.index.map(lambda s: s.rsplit(\"_\", 1)))" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": { - "id": "DDVh1WtrbD4T" - }, - "outputs": [], - "source": [ - "bdata_subset = bdata[:10,bdata.samples.sort == \"bulk\"]" + "bdata.uns['allele_counts']" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 14, "metadata": { - "id": "Z_s5M7L1bD4U" + "id": "pEX2eOem4uka" }, "outputs": [], "source": [ - "bdata.uns[\"allele_counts\"] = bdata.uns['allele_counts'].loc[bdata.uns['allele_counts'].allele.map(str) != \"\"]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6wzebZzsbD4a" - }, - "source": [ - "## Getting edit rates from allele counts\n", - "\n" + "bdata.get_edit_from_allele()" ] }, { "cell_type": "code", - "execution_count": 42, + "source": [ + "bdata.uns[\"edit_counts\"]" + ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 488 + "height": 444 }, - "id": "GMV9enV35HKu", - "outputId": "e8259a37-98f1-408c-9e16-d5315c2ce06c" + "id": "l-QOC0pXGzZ-", + "outputId": "7028aaf1-cd78-4f17-f585-b872297ce000" }, + "execution_count": 15, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " guide edit rep5_top rep5_high rep5_bulk rep5_low \\\n", + "0 ACAT2_SA_44_g1 -2:6:+:T>A 0 0 0 0 \n", + "1 ACAT2_SA_44_g1 -2:6:+:T>C 0 0 0 0 \n", + "2 ACAT2_SA_44_g1 -3:5:+:T>A 1 0 0 0 \n", + "3 ACAT2_SA_44_g1 -3:5:+:T>C 0 0 0 0 \n", + "4 ACAT2_SA_44_g1 -3:5:+:T>G 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "1761 CONTROL_9_g5 7:19:+:A>T 0 0 0 0 \n", + "1762 CONTROL_9_g5 8:20:+:C>A 0 0 0 0 \n", + "1763 CONTROL_9_g5 8:20:+:C>T 0 0 0 0 \n", + "1764 CONTROL_9_g5 9:21:+:C>- 0 0 0 0 \n", + "1765 CONTROL_9_g5 9:21:+:C>A 0 0 0 0 \n", + "\n", + " rep5_bot rep6_top rep6_high rep6_bulk rep6_low rep6_bot \n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 0 0 0 \n", + "2 0 0 0 0 0 0 \n", + "3 0 0 0 0 1 0 \n", + "4 0 0 1 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "1761 0 0 1 0 0 0 \n", + "1762 0 0 0 0 0 0 \n", + "1763 0 0 1 0 0 0 \n", + "1764 0 0 0 0 0 0 \n", + "1765 0 0 0 0 0 0 \n", + "\n", + "[1766 rows x 12 columns]" + ], "text/html": [ "\n", - "
\n", - "
\n", - "
\n", + "
\n", + "
\n", "\n", "\n", - " \n", - "
\n", + " \n", "
\n", - " " + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " guide \\\n", - "1 LDLR_SA_3_g5 \n", - "2 LDLR_SA_3_g5 \n", - "3 LDLR_SA_3_g5 \n", - "4 LDLR_SA_3_g5 \n", - "5 LDLR_SA_3_g5 \n", - "... ... \n", - "165506 2:164588224GAG_Maj_ABE_10_g3 \n", - "165507 2:164588224GAG_Maj_ABE_10_g3 \n", - "165508 rs4921914_Min_ABE_501_g4 \n", - "165509 rs191388787_Maj_ABE_121_g2 \n", - "165510 rs113408797_Maj_ABE_41_g1 \n", - "\n", - " allele rep1_bot rep2_bot \\\n", - "1 0:13:+:A>G 6 16 \n", - "2 -12:1:+:A>G,0:13:+:A>G 2 16 \n", - "3 0:13:+:A>G,7:20:+:A>G 2 0 \n", - "4 -12:1:+:A>G,-8:5:+:A>G,0:13:+:A>G,10:23:+:A>G 1 0 \n", - "5 -12:1:+:A>G,0:13:+:A>G,7:20:+:A>G 1 0 \n", - "... ... ... ... \n", - "165506 -10:1:+:C>T,-8:3:+:A>T 0 0 \n", - "165507 -8:3:+:A>T,-3:8:+:C>A,-1:10:+:G>A 0 0 \n", - "165508 -9:3:+:A>G,-8:4:+:A>G,-3:9:+:A>G,2:14:+:A>G,4:... 0 0 \n", - "165509 -7:3:+:A>G,-5:5:+:A>G,-3:7:+:A>G 0 0 \n", - "165510 -8:1:+:A>G,-1:8:+:A>G,3:12:+:A>G,5:14:+:A>G,17... 0 0 \n", - "\n", - " rep3_VPA_bot rep4_VPA_bot rep1_bulk rep2_bulk rep3_VPA_bulk \\\n", - "1 11 24 29 21 28 \n", - "2 1 24 16 5 9 \n", - "3 0 0 0 0 0 \n", - "4 0 0 0 0 0 \n", - "5 0 0 0 2 2 \n", - "... ... ... ... ... ... \n", - "165506 0 0 0 0 0 \n", - "165507 0 0 0 0 0 \n", - "165508 0 0 0 0 0 \n", - "165509 0 0 0 0 0 \n", - "165510 0 0 0 0 0 \n", - "\n", - " rep4_VPA_bulk rep1_top rep2_top rep3_VPA_top rep4_VPA_top \n", - "1 22 27 11 20 13 \n", - "2 4 17 12 7 3 \n", - "3 9 0 0 0 0 \n", - "4 0 0 0 0 0 \n", - "5 0 1 0 0 0 \n", - "... ... ... ... ... ... \n", - "165506 0 0 0 0 1 \n", - "165507 0 0 0 0 1 \n", - "165508 0 0 0 0 1 \n", - "165509 0 0 0 0 1 \n", - "165510 0 0 0 0 1 \n", - "\n", - "[162065 rows x 14 columns]" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"bdata\",\n \"rows\": 1766,\n \"fields\": [\n {\n \"column\": \"guide\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 30,\n \"samples\": [\n \"CONTROL_9_g3\",\n \"ACAT2_SA_47_g1\",\n \"CONTROL_8_g4\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"edit\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 1183,\n \"samples\": [\n \"13:22:+:A>G\",\n \"-10:1:+:G>C\",\n \"7:18:+:G>T\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 32,\n \"min\": 0,\n \"max\": 356,\n \"num_unique_values\": 101,\n \"samples\": [\n 5,\n 13,\n 43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13,\n \"min\": 0,\n \"max\": 139,\n \"num_unique_values\": 71,\n \"samples\": [\n 13,\n 0,\n 46\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 0,\n \"max\": 354,\n \"num_unique_values\": 85,\n \"samples\": [\n 92,\n 0,\n 35\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 46,\n \"min\": 0,\n \"max\": 622,\n \"num_unique_values\": 131,\n \"samples\": [\n 127,\n 285,\n 68\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep5_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 0,\n \"max\": 54,\n \"num_unique_values\": 38,\n \"samples\": [\n 4,\n 19,\n 18\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_top\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 46,\n \"min\": 0,\n \"max\": 1295,\n \"num_unique_values\": 87,\n \"samples\": [\n 67,\n 0,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_high\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 151,\n \"min\": 0,\n \"max\": 2927,\n \"num_unique_values\": 119,\n \"samples\": [\n 62,\n 838,\n 42\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bulk\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 100,\n \"min\": 0,\n \"max\": 2411,\n \"num_unique_values\": 101,\n \"samples\": [\n 222,\n 198,\n 231\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 117,\n \"min\": 0,\n \"max\": 3100,\n \"num_unique_values\": 94,\n \"samples\": [\n 23,\n 77,\n 126\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rep6_bot\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 62,\n \"min\": 0,\n \"max\": 1563,\n \"num_unique_values\": 92,\n \"samples\": [\n 97,\n 197,\n 48\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 42, "metadata": {}, - "output_type": "execute_result" + "execution_count": 15 } - ], - "source": [ - "bdata.uns['allele_counts']" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "pEX2eOem4uka" - }, - "outputs": [], - "source": [ - "bdata.uns[\"edit_counts\"] = bdata.get_edit_from_allele()" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FwKNCaIm7JSH", - "outputId": "f874adbf-6286-46a5-8b6b-5f80a204da58" + "outputId": "a5011cfa-9f61-43c8-9b20-4b684648ed68" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "New edit matrix saved in .layers['edits']. Returning old edits.\n" ] }, { + "output_type": "execute_result", "data": { "text/plain": [ - "array([[0., 0., 0., ..., 0., 0., 0.],\n", - " [0., 0., 0., ..., 0., 0., 0.],\n", - " [0., 0., 0., ..., 0., 0., 0.],\n", - " ...,\n", - " [0., 0., 0., ..., 0., 0., 0.],\n", - " [0., 0., 0., ..., 0., 0., 0.],\n", - " [0., 0., 0., ..., 0., 0., 0.]])" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "bdata.get_edit_mat_from_uns(\"A\", \"G\", target_pos_col = \"target_pos\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "id": "aJXnxwbb4F3G" - }, - "outputs": [], - "source": [ - "window_edit_rate= bdata.get_guide_edit_rate(normalize_by_editable_base = True,\n", - " edited_base = \"A\",\n", - " editable_base_start = 3,\n", - " editable_base_end = 8,\n", - " bcmatch_thres = 5,\n", - " prior_weight = 1,\n", - " return_result = True)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 490 - }, - "id": "dgM4MptLbD4c", - "outputId": "94f97a73-0e8e-4ffc-9833-8634053c3f42" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(array([579., 364., 299., 249., 221., 214., 191., 134., 159., 133., 97.,\n", - " 90., 83., 76., 70., 67., 48., 53., 45., 56., 38., 37.,\n", - " 32., 21., 24., 15., 8., 7., 1., 2.]),\n", - " array([4.13052458e-04, 2.89016995e-02, 5.73903465e-02, 8.58789936e-02,\n", - " 1.14367641e-01, 1.42856288e-01, 1.71344935e-01, 1.99833582e-01,\n", - " 2.28322229e-01, 2.56810876e-01, 2.85299523e-01, 3.13788170e-01,\n", - " 3.42276817e-01, 3.70765464e-01, 3.99254111e-01, 4.27742758e-01,\n", - " 4.56231405e-01, 4.84720052e-01, 5.13208699e-01, 5.41697346e-01,\n", - " 5.70185993e-01, 5.98674640e-01, 6.27163287e-01, 6.55651934e-01,\n", - " 6.84140582e-01, 7.12629229e-01, 7.41117876e-01, 7.69606523e-01,\n", - " 7.98095170e-01, 8.26583817e-01, 8.55072464e-01]),\n", - " )" + "array([[1.900e+01, 2.500e+01, 6.200e+01, 8.400e+01, 7.000e+00, 1.500e+01,\n", + " 1.120e+02, 1.190e+02, 9.200e+01, 4.300e+01],\n", + " [8.000e+00, 2.000e+00, 0.000e+00, 4.700e+01, 5.000e+00, 1.900e+01,\n", + " 2.410e+02, 1.800e+01, 3.100e+01, 4.800e+01],\n", + " [3.700e+01, 2.000e+01, 3.500e+01, 6.300e+01, 1.000e+00, 2.470e+02,\n", + " 8.260e+02, 4.150e+02, 5.800e+02, 5.040e+02],\n", + " [2.000e+00, 2.500e+01, 4.300e+01, 6.200e+01, 8.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00],\n", + " [0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00],\n", + " [3.000e+00, 3.100e+01, 3.000e+01, 1.470e+02, 1.200e+01, 0.000e+00,\n", + " 6.000e+01, 1.100e+01, 4.000e+00, 2.000e+00],\n", + " [1.700e+02, 8.200e+01, 7.400e+01, 2.080e+02, 1.000e+01, 2.140e+02,\n", + " 9.500e+02, 3.960e+02, 4.610e+02, 1.620e+02],\n", + " [1.400e+01, 4.200e+01, 2.200e+01, 4.900e+01, 1.700e+01, 0.000e+00,\n", + " 7.000e+01, 0.000e+00, 0.000e+00, 0.000e+00],\n", + " [1.090e+02, 4.900e+01, 4.500e+01, 9.100e+01, 6.000e+00, 5.000e+00,\n", + " 2.000e+00, 3.000e+00, 2.200e+01, 0.000e+00],\n", + " [1.000e+00, 2.400e+01, 5.000e+00, 5.000e+00, 6.000e+00, 4.900e+01,\n", + " 3.560e+02, 1.530e+02, 3.090e+02, 7.400e+01],\n", + " [3.700e+01, 6.500e+01, 6.800e+01, 1.950e+02, 1.800e+01, 2.100e+01,\n", + " 4.200e+01, 6.300e+01, 5.000e+01, 2.300e+01],\n", + " [3.560e+02, 1.170e+02, 3.190e+02, 5.240e+02, 5.400e+01, 2.770e+02,\n", + " 1.447e+03, 1.066e+03, 4.310e+02, 2.210e+02],\n", + " [2.980e+02, 1.170e+02, 1.290e+02, 2.160e+02, 4.700e+01, 2.560e+02,\n", + " 1.093e+03, 5.270e+02, 1.306e+03, 5.160e+02],\n", + " [8.700e+01, 3.700e+01, 1.200e+02, 2.040e+02, 1.200e+01, 8.900e+01,\n", + " 5.960e+02, 1.100e+02, 8.400e+01, 7.700e+01],\n", + " [2.320e+02, 2.300e+01, 1.040e+02, 2.430e+02, 9.000e+00, 3.610e+02,\n", + " 9.270e+02, 4.930e+02, 2.250e+02, 1.970e+02],\n", + " [1.120e+02, 3.700e+01, 6.100e+01, 1.630e+02, 1.700e+01, 3.800e+01,\n", + " 7.600e+01, 0.000e+00, 9.200e+01, 2.000e+01],\n", + " [9.800e+01, 2.700e+01, 6.500e+01, 4.700e+01, 9.000e+00, 5.600e+01,\n", + " 2.510e+02, 1.240e+02, 7.300e+01, 1.090e+02],\n", + " [6.200e+01, 2.100e+01, 7.500e+01, 1.930e+02, 5.000e+00, 1.120e+02,\n", + " 1.420e+02, 1.540e+02, 1.990e+02, 1.750e+02],\n", + " [4.600e+01, 4.400e+01, 2.700e+01, 1.030e+02, 1.800e+01, 1.500e+01,\n", + " 2.300e+01, 0.000e+00, 6.000e+00, 5.000e+00],\n", + " [1.110e+02, 1.220e+02, 2.160e+02, 3.150e+02, 3.600e+01, 1.280e+02,\n", + " 4.190e+02, 1.960e+02, 7.200e+01, 1.120e+02],\n", + " [3.370e+02, 9.200e+01, 1.710e+02, 2.730e+02, 4.500e+01, 1.295e+03,\n", + " 2.927e+03, 2.411e+03, 3.100e+03, 1.563e+03],\n", + " [2.630e+02, 8.300e+01, 4.500e+01, 1.890e+02, 1.600e+01, 3.600e+01,\n", + " 2.930e+02, 2.870e+02, 7.700e+01, 9.700e+01],\n", + " [1.300e+01, 5.900e+01, 7.700e+01, 1.940e+02, 1.600e+01, 2.400e+01,\n", + " 1.500e+02, 1.000e+00, 3.100e+01, 1.700e+01],\n", + " [1.590e+02, 7.100e+01, 1.000e+02, 2.680e+02, 2.100e+01, 3.070e+02,\n", + " 8.190e+02, 6.040e+02, 9.160e+02, 3.740e+02],\n", + " [8.300e+01, 4.600e+01, 4.800e+01, 1.260e+02, 6.000e+00, 2.400e+01,\n", + " 1.330e+02, 0.000e+00, 0.000e+00, 2.000e+00],\n", + " [5.500e+01, 1.400e+01, 0.000e+00, 5.600e+01, 7.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 4.000e+00, 1.000e+00],\n", + " [3.700e+01, 4.900e+01, 7.300e+01, 1.400e+02, 2.200e+01, 1.600e+01,\n", + " 3.200e+01, 1.850e+02, 2.500e+01, 2.100e+01],\n", + " [5.500e+01, 3.100e+01, 0.000e+00, 1.600e+02, 1.500e+01, 1.300e+01,\n", + " 3.600e+01, 0.000e+00, 4.000e+00, 2.500e+01],\n", + " [7.000e+00, 1.500e+01, 0.000e+00, 1.500e+01, 2.000e+00, 0.000e+00,\n", + " 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00],\n", + " [9.900e+01, 0.000e+00, 0.000e+00, 2.000e+01, 0.000e+00, 0.000e+00,\n", + " 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00]])" ] }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD5CAYAAADcDXXiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAAsTAAALEwEAmpwYAAAP+klEQVR4nO3dfYzlVX3H8fdHVrQ+sQjrhuwuHRrXtsRGIRPE2LTqtg0PDUtSJJhaV7pxE4vGBtO6tX/08Q9IU6kmhnYDrYvxAUpr2SjVEsCYNoU6CKJArSuF7m6BHRG2tUQt9ds/7lkywMzeOzN3Hvbs+5VM7vmd37lzv3Oy+5kzZ373N6kqJEl9ecFKFyBJGj/DXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ2tGGZRkLXAN8FqggF8HvglcD0wADwEXV9UTSQJ8BDgPeAp4V1V99Uif/+STT66JiYkFfQGSdKy66667vlNV62Y7N1K4MwjrL1TVRUmOB14CfAi4taquSLIT2Al8EDgX2Nw+3gBc3R7nNDExwdTU1IilSJIAkjw817mh2zJJTgB+DrgWoKp+WFVPAluB3W3YbuDC1t4KXFcDdwBrk5yy4OolSfM2yp77acA08FdJ7k5yTZKXAuur6pE25lFgfWtvAPbNeP7+1vcsSXYkmUoyNT09vfCvQJL0PKOE+xrgTODqqjoD+B8GWzDPqME9DOZ1H4Oq2lVVk1U1uW7drFtGkqQFGiXc9wP7q+rOdnwjg7B/7PB2S3s82M4fADbNeP7G1idJWiZDw72qHgX2JfnJ1rUFuB/YA2xrfduAm1p7D/DODJwNHJqxfSNJWgajXi3zPuCT7UqZB4FLGXxjuCHJduBh4OI29mYGl0HuZXAp5KVjrViSNNRI4V5V9wCTs5zaMsvYAi5bXFmSpMXwHaqS1CHDXZI6NOqe+6o1sfPzI4176Irzl7gSSVo9XLlLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6NFK4J3koydeT3JNkqvW9MsktSb7VHk9s/Uny0SR7k9yb5Myl/AIkSc83n5X7W6rq9VU12Y53ArdW1Wbg1nYMcC6wuX3sAK4eV7GSpNEsZltmK7C7tXcDF87ov64G7gDWJjllEa8jSZqnUcO9gH9IcleSHa1vfVU90tqPAutbewOwb8Zz97e+Z0myI8lUkqnp6ekFlC5JmsuaEcf9bFUdSPIq4JYk/zrzZFVVkprPC1fVLmAXwOTk5LyeK0k6spFW7lV1oD0eBD4LnAU8dni7pT0ebMMPAJtmPH1j65MkLZOh4Z7kpUlefrgN/BLwDWAPsK0N2wbc1Np7gHe2q2bOBg7N2L6RJC2DUbZl1gOfTXJ4/Keq6gtJvgLckGQ78DBwcRt/M3AesBd4Crh07FVLko5oaLhX1YPA62bpfxzYMkt/AZeNpTpJ0oL4DlVJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUMjh3uS45LcneRz7fi0JHcm2Zvk+iTHt/4XteO97fzEEtUuSZrDfFbu7wcemHF8JXBVVb0aeALY3vq3A0+0/qvaOEnSMhop3JNsBM4HrmnHAd4K3NiG7AYubO2t7Zh2fksbL0laJqOu3P8M+G3gR+34JODJqnq6He8HNrT2BmAfQDt/qI1/liQ7kkwlmZqenl5Y9ZKkWQ0N9yS/DBysqrvG+cJVtauqJqtqct26deP81JJ0zFszwpg3ARckOQ94MfAK4CPA2iRr2up8I3CgjT8AbAL2J1kDnAA8PvbKJUlzGrpyr6rfqaqNVTUBXALcVlW/CtwOXNSGbQNuau097Zh2/raqqrFWLUk6osVc5/5B4PIkexnsqV/b+q8FTmr9lwM7F1eiJGm+RtmWeUZVfQn4Ums/CJw1y5jvA28bQ22SpAXyHaqS1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHVoXte5H80mdn5+5LEPXXH+ElYiSUvPlbskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkOEuSR0y3CWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4NDfckL07yL0m+luS+JH/Q+k9LcmeSvUmuT3J8639RO97bzk8s8dcgSXqOUVbuPwDeWlWvA14PnJPkbOBK4KqqejXwBLC9jd8OPNH6r2rjJEnLaGi418D32uEL20cBbwVubP27gQtbe2s7pp3fkiTjKliSNNxIe+5JjktyD3AQuAX4NvBkVT3dhuwHNrT2BmAfQDt/CDhpjDVLkoZYM8qgqvo/4PVJ1gKfBX5qsS+cZAewA+DUU09d7Kcbq4mdnx9p3ENXnL/ElUjSwszrapmqehK4HXgjsDbJ4W8OG4EDrX0A2ATQzp8APD7L59pVVZNVNblu3bqFVS9JmtUoV8usayt2kvwY8IvAAwxC/qI2bBtwU2vvace087dVVY2xZknSEKNsy5wC7E5yHINvBjdU1eeS3A98JskfA3cD17bx1wKfSLIX+C5wyRLULUk6gqHhXlX3AmfM0v8gcNYs/d8H3jaW6iRJC+I7VCWpQ4a7JHXIcJekDhnuktQhw12SOmS4S1KHDHdJ6pDhLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjo00l9i0uz8i02SVitX7pLUIcNdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHfoboMfCerpOXmyl2SOuTKfRVxhS9pXFy5S1KHhoZ7kk1Jbk9yf5L7kry/9b8yyS1JvtUeT2z9SfLRJHuT3JvkzKX+IiRJzzbKyv1p4ANVdTpwNnBZktOBncCtVbUZuLUdA5wLbG4fO4Crx161JOmIhoZ7VT1SVV9t7f8GHgA2AFuB3W3YbuDC1t4KXFcDdwBrk5wy7sIlSXOb1557kgngDOBOYH1VPdJOPQqsb+0NwL4ZT9vf+p77uXYkmUoyNT09Pd+6JUlHMHK4J3kZ8DfAb1bVf808V1UF1HxeuKp2VdVkVU2uW7duPk+VJA0xUrgneSGDYP9kVf1t637s8HZLezzY+g8Am2Y8fWPrkyQtk6HXuScJcC3wQFV9eMapPcA24Ir2eNOM/vcm+QzwBuDQjO0bjYHXw0saZpQ3Mb0J+DXg60nuaX0fYhDqNyTZDjwMXNzO3QycB+wFngIuHWfBkqThhoZ7Vf0jkDlOb5llfAGXLbIuSdIi+A5VSeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1CHDXZI6ZLhLUocMd0nqkH9DVSPfqwa8X410tHDlLkkdMtwlqUOGuyR1yHCXpA4Z7pLUIcNdkjpkuEtShwx3SeqQb2Lq2HzenDTuz+mbnaSV5cpdkjpkuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QOGe6S1KGh4Z7kL5McTPKNGX2vTHJLkm+1xxNbf5J8NMneJPcmOXMpi5ckzW6UlfvHgXOe07cTuLWqNgO3tmOAc4HN7WMHcPV4ypQkzcfQcK+qLwPffU73VmB3a+8GLpzRf10N3AGsTXLKmGqVJI1ooXvu66vqkdZ+FFjf2huAfTPG7W99z5NkR5KpJFPT09MLLEOSNJtF3zisqipJLeB5u4BdAJOTk/N+vlY3bzAmrayFrtwfO7zd0h4Ptv4DwKYZ4za2PknSMlpouO8BtrX2NuCmGf3vbFfNnA0cmrF9I0laJkO3ZZJ8GngzcHKS/cDvAVcANyTZDjwMXNyG3wycB+wFngIuXYKa1RG3b6SlMTTcq+rtc5zaMsvYAi5bbFGSpMXxLzHpqOAKX5ofbz8gSR0y3CWpQ27LqCtu30gDrtwlqUOu3HVMcoWv3hnu0hGM+k0A/Eag1cVtGUnqkOEuSR0y3CWpQ+65S0c5fzms2Rju0jIzjLUc3JaRpA65cpfGZD6XTUpLzXCXVim/WWgx3JaRpA65cpeOEf4i99hiuEt6Fr8J9MFwl7Qg3ndndXPPXZI6ZLhLUofclpG0arjfPz6Gu6Ql5zX7y89tGUnqkCt3SUcdt2+Gc+UuSR0y3CWpQ0uyLZPkHOAjwHHANVV1xVK8jiQdybh/kXs0bfOMfeWe5DjgY8C5wOnA25OcPu7XkSTNbSlW7mcBe6vqQYAknwG2AvcvwWtJ0rJZiks6l+qngaUI9w3AvhnH+4E3PHdQkh3Ajnb4vSTfXODrnQx8Z4HP7Z1zc2TOz9ycm7mNdW5y5aKe/uNznVixSyGrahewa7GfJ8lUVU2OoaTuODdH5vzMzbmZ29EyN0txtcwBYNOM442tT5K0TJYi3L8CbE5yWpLjgUuAPUvwOpKkOYx9W6aqnk7yXuCLDC6F/Muqum/crzPDord2OubcHJnzMzfnZm5Hxdykqla6BknSmPkOVUnqkOEuSR06asI9yTlJvplkb5Kds5x/UZLr2/k7k0ysQJkrYoS5uTzJ/UnuTXJrkjmvje3NsLmZMe5XklSSVX+J2ziNMj9JLm7/fu5L8qnlrnGljPD/6tQktye5u/3fOm8l6pxTVa36Dwa/mP028BPA8cDXgNOfM+Y3gD9v7UuA61e67lU0N28BXtLa73Funjfu5cCXgTuAyZWuezXND7AZuBs4sR2/aqXrXkVzswt4T2ufDjy00nXP/DhaVu7P3NKgqn4IHL6lwUxbgd2tfSOwJUmWscaVMnRuqur2qnqqHd7B4L0Hx4JR/t0A/BFwJfD95SxuFRhlft4NfKyqngCoqoPLXONKGWVuCnhFa58A/Ocy1jfU0RLus93SYMNcY6rqaeAQcNKyVLeyRpmbmbYDf7+kFa0eQ+cmyZnApqo6Fv8O3Cj/dl4DvCbJPyW5o93x9Vgwytz8PvCOJPuBm4H3LU9po/EvMR1DkrwDmAR+fqVrWQ2SvAD4MPCuFS5lNVvDYGvmzQx+4vtykp+pqidXsqhV4u3Ax6vqT5O8EfhEktdW1Y9WujA4elbuo9zS4JkxSdYw+DHp8WWpbmWNdLuHJL8A/C5wQVX9YJlqW2nD5ublwGuBLyV5CDgb2HMM/VJ1lH87+4E9VfW/VfXvwL8xCPvejTI324EbAKrqn4EXM7ip2KpwtIT7KLc02ANsa+2LgNuq/aajc0PnJskZwF8wCPZjZc8UhsxNVR2qqpOraqKqJhj8PuKCqppamXKX3Sj/r/6OwaqdJCcz2KZ5cBlrXCmjzM1/AFsAkvw0g3CfXtYqj+CoCPe2h374lgYPADdU1X1J/jDJBW3YtcBJSfYClwNzXvbWkxHn5k+AlwF/neSeJMfEvX5GnJtj1ojz80Xg8ST3A7cDv1VV3f9EPOLcfAB4d5KvAZ8G3rWaFpTefkCSOnRUrNwlSfNjuEtShwx3SeqQ4S5JHTLcJalDhrskdchwl6QO/T+egzcBrt+oAwAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" + "execution_count": 16 } ], "source": [ - "plt.hist(window_edit_rate, bins=30)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YkgDKePa8ldu" - }, - "source": [ - "# Allele filtering" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KB1B2sqX8qaA" - }, - "source": [ - "If you have non-edited control data of reporter (e.g., plasmid library), you can filter out the reporter to select the significant base edits per guides. Significance is determined by Fisher's exact test." + "bdata.get_edit_mat_from_uns(target_pos_col = \"target_pos\")" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { + "id": "aJXnxwbb4F3G", "colab": { "base_uri": "https://localhost:8080/" }, - "id": "8tDcYNhB8KGj", - "outputId": "7f0523e8-01b7-4139-a6a2-9fb9850caf01" + "outputId": "286999e5-e2e4-4ec8-a525-b378788df340" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Downloading...\n", - "From: https://drive.google.com/uc?id=153fKJntS-4vlrodC6xlZ9oINQo1XT_i9\n", - "To: /content/bean_count_LDLvar_plasmid.h5ad\n", - "\r 0% 0.00/5.10M [00:00" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAGwCAYAAACHJU4LAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAkmElEQVR4nO3deVTU9f7H8dcIMqgXcEUxEcrEMjNL00NqRVq5ZGmdNjXRtBVvJnUq8xZxLbFFj3UrNG9J3RbK0m43U0vLFpdSE7LNfcEElxYWyxGZz++Pfs5pRA3Gge+H5vk4Z85pvnyYedMn83m+8x3GZYwxAgAAsFA9pwcAAAA4FkIFAABYi1ABAADWIlQAAIC1CBUAAGAtQgUAAFiLUAEAANYKd3qAE+H1erVr1y5FRUXJ5XI5PQ4AAKgCY4xKS0vVunVr1at3/HMmdTpUdu3apfj4eKfHAAAAASgoKFCbNm2Ou6ZOh0pUVJSk33/Q6Ohoh6cBAABVUVJSovj4eN/f48dTp0Pl8Ms90dHRhAoAAHVMVS7b4GJaAABgLUIFAABYi1ABAADWIlQAAIC1CBUAAGAtQgUAAFiLUAEAANYiVAAAgLUIFQAAYC1CBQAAWItQAQAA1nI8VH744QcNHz5czZo1U4MGDXTmmWdq9erVTo8FAAAs4OiHEv7888/q2bOnUlJStGDBArVo0UIbN25UkyZNnBwLAABYwtFQefTRRxUfH6/Zs2f7jp188skOTgQAAGzi6Es/77zzjrp166arr75asbGxOvvsszVr1qxjrvd4PCopKfG7AQCAvy5Hz6hs2bJF2dnZSk9P1/33369Vq1bpjjvuUEREhFJTUyutz8rKUmZmpgOT1h2J9813egRYatuUgU6PAADV5jLGGKeePCIiQt26ddPy5ct9x+644w6tWrVKK1asqLTe4/HI4/H47peUlCg+Pl7FxcWKjo6ulZltR6jgWAgVALYoKSlRTExMlf7+dvSln7i4OHXs2NHv2Omnn64dO3Ycdb3b7VZ0dLTfDQAA/HU5Gio9e/bU+vXr/Y5t2LBBCQkJDk0EAABs4miojB8/XitXrtTkyZO1adMmvfrqq3ruueeUlpbm5FgAAMASjobKueeeq3nz5um1115Tp06dNGnSJE2fPl3Dhg1zciwAAGAJR9/1I0mXXXaZLrvsMqfHAAAAFnL8V+gDAAAcC6ECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWo6GykMPPSSXy+V3O+2005wcCQAAWCTc6QHOOOMMLV682Hc/PNzxkQAAgCUcr4Lw8HC1atWqSms9Ho88Ho/vfklJSU2NBQAALOB4qGzcuFGtW7dWZGSkkpOTlZWVpbZt2x51bVZWljIzM2tttsT75tfacwEAgMocvUalR48eysnJ0cKFC5Wdna2tW7eqd+/eKi0tPer6CRMmqLi42HcrKCio5YkBAEBtcvSMSv/+/X3/3LlzZ/Xo0UMJCQl64403NHr06Err3W633G53bY4IAAAcZNXbkxs3bqykpCRt2rTJ6VEAAIAFrAqVsrIybd68WXFxcU6PAgAALOBoqNx99936+OOPtW3bNi1fvlxDhgxRWFiYrr/+eifHAgAAlnD0GpWdO3fq+uuv148//qgWLVqoV69eWrlypVq0aOHkWAAAwBKOhkpubq6TTw8AACxn1TUqAAAAf0SoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFrWhMqUKVPkcrl05513Oj0KAACwhBWhsmrVKs2cOVOdO3d2ehQAAGARx0OlrKxMw4YN06xZs9SkSROnxwEAABZxPFTS0tI0cOBA9e3b90/XejwelZSU+N0AAMBfV7iTT56bm6svv/xSq1atqtL6rKwsZWZm1vBUABBaEu+b7/QI1bZtykCnR0AtceyMSkFBgcaNG6dXXnlFkZGRVfqeCRMmqLi42HcrKCio4SkBAICTHDujsmbNGu3Zs0fnnHOO71hFRYU++eQTPf300/J4PAoLC/P7HrfbLbfbXdujAgAAhzgWKn369NG6dev8jo0aNUqnnXaa7r333kqRAgAAQo9joRIVFaVOnTr5HWvUqJGaNWtW6TgAAAhNjr/rBwAA4FgcfdfPkZYuXer0CAAAwCKcUQEAANYiVAAAgLUIFQAAYC1CBQAAWItQAQAA1iJUAACAtQgVAABgLUIFAABYi1ABAADWIlQAAIC1CBUAAGCtgEJly5YtwZ4DAACgkoBC5dRTT1VKSopefvllHThwINgzAQAASAowVL788kt17txZ6enpatWqlW655RZ98cUXwZ4NAACEuIBCpUuXLnryySe1a9cuvfDCCyosLFSvXr3UqVMnTZs2TXv37g32nAAAIASd0MW04eHhuvLKKzVnzhw9+uij2rRpk+6++27Fx8drxIgRKiwsDNacAAAgBJ1QqKxevVq333674uLiNG3aNN19993avHmzPvjgA+3atUtXXHFFsOYEAAAhKDyQb5o2bZpmz56t9evXa8CAAXrppZc0YMAA1av3e/ecfPLJysnJUWJiYjBnBQAAISagUMnOztaNN96okSNHKi4u7qhrYmNj9fzzz5/QcAAAILQFFCobN2780zURERFKTU0N5OEBAAAkBXiNyuzZszVnzpxKx+fMmaMXX3zxhIcCAACQAgyVrKwsNW/evNLx2NhYTZ48+YSHAgAAkAIMlR07dujkk0+udDwhIUE7duw44aEAAACkAEMlNjZWX331VaXj+fn5atas2QkPBQAAIAUYKtdff73uuOMOffTRR6qoqFBFRYU+/PBDjRs3Ttddd12wZwQAACEqoHf9TJo0Sdu2bVOfPn0UHv77Q3i9Xo0YMYJrVAAAQNAEFCoRERF6/fXXNWnSJOXn56tBgwY688wzlZCQEOz5AABACAsoVA5LSkpSUlJSsGYBAADwE1CoVFRUKCcnR0uWLNGePXvk9Xr9vv7hhx8GZTgAABDaAgqVcePGKScnRwMHDlSnTp3kcrmCPRcAAEBgoZKbm6s33nhDAwYMCPY8AAAAPgG9PTkiIkKnnnpqsGcBAADwE1Co3HXXXXryySdljAn2PAAAAD4BvfTz2Wef6aOPPtKCBQt0xhlnqH79+n5fnzt3blCGAwAAoS2gUGncuLGGDBkS7FkAAAD8BBQqs2fPDvYcAAAAlQR0jYokHTp0SIsXL9bMmTNVWloqSdq1a5fKysqCNhwAAAhtAZ1R2b59u/r166cdO3bI4/Ho4osvVlRUlB599FF5PB7NmDEj2HMCAIAQFNAZlXHjxqlbt276+eef1aBBA9/xIUOGaMmSJUEbDgAAhLaAzqh8+umnWr58uSIiIvyOJyYm6ocffgjKYAAAAAGdUfF6vaqoqKh0fOfOnYqKijrhoQAAAKQAQ+WSSy7R9OnTffddLpfKysqUkZHBr9UHAABBE9BLP1OnTtWll16qjh076sCBAxo6dKg2btyo5s2b67XXXgv2jAAAIEQFFCpt2rRRfn6+cnNz9dVXX6msrEyjR4/WsGHD/C6uBQAAOBEBhYokhYeHa/jw4cGcBQAAwE9AofLSSy8d9+sjRowIaBgAAIA/CihUxo0b53e/vLxcv/76qyIiItSwYUNCBQAABEVA7/r5+eef/W5lZWVav369evXqxcW0AAAgaAL+rJ8jtW/fXlOmTKl0tuV4srOz1blzZ0VHRys6OlrJyclasGBBsEYCAAB1XNBCRfr9Attdu3ZVeX2bNm00ZcoUrVmzRqtXr9ZFF12kK664Qt98800wxwIAAHVUQNeovPPOO373jTEqLCzU008/rZ49e1b5cQYNGuR3/5FHHlF2drZWrlypM844I5DRAADAX0hAoTJ48GC/+y6XSy1atNBFF12kqVOnBjRIRUWF5syZo/379ys5Ofmoazwejzwej+9+SUlJQM8FAADqhoBCxev1Bm2AdevWKTk5WQcOHNDf/vY3zZs3Tx07djzq2qysLGVmZgbtuQEAgN2Ceo1KIDp06KC8vDx9/vnnuu2225Samqpvv/32qGsnTJig4uJi362goKCWpwUAALUpoDMq6enpVV47bdq04349IiJCp556qiSpa9euWrVqlZ588knNnDmz0lq32y232129YQEAQJ0VUKisXbtWa9euVXl5uTp06CBJ2rBhg8LCwnTOOef41rlcrmo/ttfr9bsOBQAAhK6AQmXQoEGKiorSiy++qCZNmkj6/ZfAjRo1Sr1799Zdd91VpceZMGGC+vfvr7Zt26q0tFSvvvqqli5dqkWLFgUyFgAA+IsJKFSmTp2q999/3xcpktSkSRM9/PDDuuSSS6ocKnv27NGIESNUWFiomJgYde7cWYsWLdLFF18cyFgAAOAvJqBQKSkp0d69eysd37t3r0pLS6v8OM8//3wgTw8AAEJEQO/6GTJkiEaNGqW5c+dq586d2rlzp9566y2NHj1aV155ZbBnBAAAISqgMyozZszQ3XffraFDh6q8vPz3BwoP1+jRo/X4448HdUAAABC6AgqVhg0b6tlnn9Xjjz+uzZs3S5LatWunRo0aBXU4AAAQ2k7oF74VFhaqsLBQ7du3V6NGjWSMCdZcAAAAgYXKjz/+qD59+igpKUkDBgxQYWGhJGn06NFVfscPAADAnwkoVMaPH6/69etrx44datiwoe/4tddeq4ULFwZtOAAAENoCukbl/fff16JFi9SmTRu/4+3bt9f27duDMhgAAEBAZ1T279/vdyblsJ9++onP4gEAAEETUKj07t1bL730ku++y+WS1+vVY489ppSUlKANBwAAQltAL/089thj6tOnj1avXq2DBw/qnnvu0TfffKOffvpJy5YtC/aMAAAgRAV0RqVTp07asGGDevXqpSuuuEL79+/XlVdeqbVr16pdu3bBnhEAAISoap9RKS8vV79+/TRjxgxNnDixJmYCAACQFMAZlfr16+urr76qiVkAAAD8BPTSz/Dhw/nkYwAAUOMCupj20KFDeuGFF7R48WJ17dq10mf8TJs2LSjDAQCA0FatUNmyZYsSExP19ddf65xzzpEkbdiwwW+Ny+UK3nQAACCkVStU2rdvr8LCQn300UeSfv+V+U899ZRatmxZI8MBAIDQVq1rVI78dOQFCxZo//79QR0IAADgsIAupj3syHABAAAIpmqFisvlqnQNCtekAACAmlKta1SMMRo5cqTvgwcPHDigW2+9tdK7fubOnRu8CQEAQMiqVqikpqb63R8+fHhQhwEAAPijaoXK7Nmza2oOAACASk7oYloAAICaRKgAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACs5WioZGVl6dxzz1VUVJRiY2M1ePBgrV+/3smRAACARRwNlY8//lhpaWlauXKlPvjgA5WXl+uSSy7R/v37nRwLAABYItzJJ1+4cKHf/ZycHMXGxmrNmjU6//zzK633eDzyeDy++yUlJTU+IwAAcI6joXKk4uJiSVLTpk2P+vWsrCxlZmbW5kgAAARN4n3znR6h2rZNGejo81tzMa3X69Wdd96pnj17qlOnTkddM2HCBBUXF/tuBQUFtTwlAACoTdacUUlLS9PXX3+tzz777Jhr3G633G53LU4FAACcZEWojB07Vu+++64++eQTtWnTxulxAACAJRwNFWOM/v73v2vevHlaunSpTj75ZCfHAQAAlnE0VNLS0vTqq6/qv//9r6KiolRUVCRJiomJUYMGDZwcDQAAWMDRi2mzs7NVXFysCy+8UHFxcb7b66+/7uRYAADAEo6/9AMAAHAs1rw9GQAA4EiECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKzlaKh88sknGjRokFq3bi2Xy6W3337byXEAAIBlHA2V/fv366yzztIzzzzj5BgAAMBS4U4+ef/+/dW/f38nRwAAABZzNFSqy+PxyOPx+O6XlJQ4OA0AAKhpdSpUsrKylJmZ6fQYQJ2UeN98p0eotm1TBjo9AgCH1al3/UyYMEHFxcW+W0FBgdMjAQCAGlSnzqi43W653W6nxwAAALWkTp1RAQAAocXRMyplZWXatGmT7/7WrVuVl5enpk2bqm3btg5OBgAAbOBoqKxevVopKSm+++np6ZKk1NRU5eTkODQVAACwhaOhcuGFF8oY4+QIAADAYlyjAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAaxEqAADAWoQKAACwFqECAACsRagAAABrESoAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsJYVofLMM88oMTFRkZGR6tGjh7744gunRwIAABZwPFRef/11paenKyMjQ19++aXOOussXXrppdqzZ4/TowEAAIc5HirTpk3TTTfdpFGjRqljx46aMWOGGjZsqBdeeMHp0QAAgMPCnXzygwcPas2aNZowYYLvWL169dS3b1+tWLGi0nqPxyOPx+O7X1xcLEkqKSmpkfm8nl9r5HEBVE1N/dmGv7r4/7q6+t8G/679H9MY86drHQ2Vffv2qaKiQi1btvQ73rJlS33//feV1mdlZSkzM7PS8fj4+BqbEYBzYqY7PQFsxX8btacm/12XlpYqJibmuGscDZXqmjBhgtLT0333vV6vfvrpJzVr1kwulyuoz1VSUqL4+HgVFBQoOjo6qI+N4GO/6h72rG5hv+oem/fMGKPS0lK1bt36T9c6GirNmzdXWFiYdu/e7Xd89+7datWqVaX1brdbbrfb71jjxo1rckRFR0dbt8E4Nvar7mHP6hb2q+6xdc/+7EzKYY5eTBsREaGuXbtqyZIlvmNer1dLlixRcnKyg5MBAAAbOP7ST3p6ulJTU9WtWzd1795d06dP1/79+zVq1CinRwMAAA5zPFSuvfZa7d27Vw8++KCKiorUpUsXLVy4sNIFtrXN7XYrIyOj0ktNsBP7VfewZ3UL+1X3/FX2zGWq8t4gAAAABzj+C98AAACOhVABAADWIlQAAIC1CBUAAGCtkA6VZ555RomJiYqMjFSPHj30xRdfHHf9nDlzdNpppykyMlJnnnmm3nvvvVqaFFL19mvWrFnq3bu3mjRpoiZNmqhv375/ur8Ivur+GTssNzdXLpdLgwcPrtkB4ae6+/XLL78oLS1NcXFxcrvdSkpK4v+Ltay6ezZ9+nR16NBBDRo0UHx8vMaPH68DBw7U0rQBMiEqNzfXREREmBdeeMF888035qabbjKNGzc2u3fvPur6ZcuWmbCwMPPYY4+Zb7/91vzjH/8w9evXN+vWravlyUNTdfdr6NCh5plnnjFr16413333nRk5cqSJiYkxO3furOXJQ1d19+ywrVu3mpNOOsn07t3bXHHFFbUzLKq9Xx6Px3Tr1s0MGDDAfPbZZ2br1q1m6dKlJi8vr5YnD13V3bNXXnnFuN1u88orr5itW7eaRYsWmbi4ODN+/Phanrx6QjZUunfvbtLS0nz3KyoqTOvWrU1WVtZR119zzTVm4MCBfsd69OhhbrnllhqdE7+r7n4d6dChQyYqKsq8+OKLNTUijhDInh06dMicd9555t///rdJTU0lVGpRdfcrOzvbnHLKKebgwYO1NSKOUN09S0tLMxdddJHfsfT0dNOzZ88anfNEheRLPwcPHtSaNWvUt29f37F69eqpb9++WrFixVG/Z8WKFX7rJenSSy895noETyD7daRff/1V5eXlatq0aU2NiT8IdM/++c9/KjY2VqNHj66NMfH/Atmvd955R8nJyUpLS1PLli3VqVMnTZ48WRUVFbU1dkgLZM/OO+88rVmzxvfy0JYtW/Tee+9pwIABtTJzoBz/zbRO2LdvnyoqKir99tuWLVvq+++/P+r3FBUVHXV9UVFRjc2J3wWyX0e699571bp160qxiZoRyJ599tlnev7555WXl1cLE+KPAtmvLVu26MMPP9SwYcP03nvvadOmTbr99ttVXl6ujIyM2hg7pAWyZ0OHDtW+ffvUq1cvGWN06NAh3Xrrrbr//vtrY+SAheQZFYSWKVOmKDc3V/PmzVNkZKTT4+AoSktLdcMNN2jWrFlq3ry50+OgCrxer2JjY/Xcc8+pa9euuvbaazVx4kTNmDHD6dFwDEuXLtXkyZP17LPP6ssvv9TcuXM1f/58TZo0yenRjiskz6g0b95cYWFh2r17t9/x3bt3q1WrVkf9nlatWlVrPYInkP067IknntCUKVO0ePFide7cuSbHxB9Ud882b96sbdu2adCgQb5jXq9XkhQeHq7169erXbt2NTt0CAvkz1hcXJzq16+vsLAw37HTTz9dRUVFOnjwoCIiImp05lAXyJ498MADuuGGGzRmzBhJ0plnnqn9+/fr5ptv1sSJE1Wvnp3nLuycqoZFRESoa9euWrJkie+Y1+vVkiVLlJycfNTvSU5O9lsvSR988MEx1yN4AtkvSXrsscc0adIkLVy4UN26dauNUfH/qrtnp512mtatW6e8vDzf7fLLL1dKSory8vIUHx9fm+OHnED+jPXs2VObNm3yBaUkbdiwQXFxcURKLQhkz3799ddKMXI4NI3NH/vn9NW8TsnNzTVut9vk5OSYb7/91tx8882mcePGpqioyBhjzA033GDuu+8+3/ply5aZ8PBw88QTT5jvvvvOZGRk8PbkWlTd/ZoyZYqJiIgwb775piksLPTdSktLnfoRQk519+xIvOundlV3v3bs2GGioqLM2LFjzfr16827775rYmNjzcMPP+zUjxByqrtnGRkZJioqyrz22mtmy5Yt5v333zft2rUz11xzjVM/QpWEbKgYY8y//vUv07ZtWxMREWG6d+9uVq5c6fvaBRdcYFJTU/3Wv/HGGyYpKclERESYM844w8yfP7+WJw5t1dmvhIQEI6nSLSMjo/YHD2HV/TP2R4RK7avufi1fvtz06NHDuN1uc8opp5hHHnnEHDp0qJanDm3V2bPy8nLz0EMPmXbt2pnIyEgTHx9vbr/9dvPzzz/X/uDV4DLG5vM9AAAglIXkNSoAAKBuIFQAAIC1CBUAAGAtQgUAAFiLUAEAANYiVAAAgLUIFQAAYC1CBQAAWItQAVAlLpdLb7/9tiRp27ZtcrlcysvLO+73XHjhhbrzzjtrfDYAf12EChBiRo4cKZfLVenWr1+/Kj9GfHy8CgsL1alTJ0m/f3y8y+XSL7/84rdu7ty5Vn+E/EMPPaQuXbo4PQaA4wh3egAAta9fv36aPXu23zG3213l7w8LCzvmR8n/UdOmTas9WzAcPHiQT/AF/iI4owKEILfbrVatWvndmjRp4vv6xo0bdf755ysyMlIdO3bUBx984Pf9f3zpZ9u2bUpJSZEkNWnSRC6XSyNHjpRU+aWfxMRETZ48WTfeeKOioqLUtm1bPffcc36PvXz5cnXp0kWRkZHq1q2b3n777T99mSkxMVGTJk3SiBEjFB0drZtvvlmSdO+99yopKUkNGzbUKaecogceeEDl5eWSpJycHGVmZio/P993ViknJ0eS9Msvv2jMmDFq0aKFoqOjddFFFyk/P9/3fPn5+UpJSVFUVJSio6PVtWtXrV69ulp7AKBqOKMCwI/X69WVV16pli1b6vPPP1dxcfFxrzOJj4/XW2+9pauuukrr169XdHS0GjRocMz1U6dO1aRJk3T//ffrzTff1G233aYLLrhAHTp0UElJiQYNGqQBAwbo1Vdf1fbt26t8jcsTTzyhBx98UBkZGb5jUVFRysnJUevWrbVu3TrddNNNioqK0j333KNrr71WX3/9tRYuXKjFixdLkmJiYiRJV199tRo0aKAFCxYoJiZGM2fOVJ8+fbRhwwY1bdpUw4YN09lnn63s7GyFhYUpLy9P9evXr9KcAKrJ6Y9vBlC7UlNTTVhYmGnUqJHf7ZFHHjHGGLNo0SITHh5ufvjhB9/3LFiwwEgy8+bNM8YYs3XrViPJrF271hhjzEcffWQkVfq4+AsuuMCMGzfOdz8hIcEMHz7cd9/r9ZrY2FiTnZ1tjDEmOzvbNGvWzPz222++NbNmzfJ7rqNJSEgwgwcP/tOf/fHHHzddu3b13c/IyDBnnXWW35pPP/3UREdHmwMHDvgdb9eunZk5c6YxxpioqCiTk5Pzp88H4MRxRgUIQSkpKcrOzvY7dvh6ku+++07x8fFq3bq172vJyclBe+7OnTv7/tnlcqlVq1bas2ePJGn9+vXq3LmzIiMjfWu6d+9epcft1q1bpWOvv/66nnrqKW3evFllZWU6dOiQoqOjj/s4+fn5KisrU7NmzfyO//bbb9q8ebMkKT09XWPGjNF//vMf9e3bV1dffbXatWtXpTkBVA+hAoSgRo0a6dRTT3XkuY98icTlcsnr9Z7w4zZq1Mjv/ooVKzRs2DBlZmbq0ksvVUxMjHJzczV16tTjPk5ZWZni4uK0dOnSSl9r3LixpN/fLTR06FDNnz9fCxYsUEZGhnJzczVkyJAT/jkA+CNUAPg5/fTTVVBQoMLCQsXFxUmSVq5cedzvOfwOm4qKihN67g4dOujll1+Wx+PxvQtp1apVAT3W8uXLlZCQoIkTJ/qObd++3W9NREREpZnPOeccFRUVKTw8XImJicd8/KSkJCUlJWn8+PG6/vrrNXv2bEIFqAG86wcIQR6PR0VFRX63ffv2SZL69u2rpKQkpaamKj8/X59++qnfX/ZHk5CQIJfLpXfffVd79+5VWVlZQHMNHTpUXq9XN998s7777jstWrRITzzxhKTfz7xUR/v27bVjxw7l5uZq8+bNeuqppzRv3jy/NYmJidq6davy8vK0b98+eTwe9e3bV8nJyRo8eLDef/99bdu2TcuXL9fEiRO1evVq/fbbbxo7dqyWLl2q7du3a9myZVq1apVOP/30gH5mAMdHqAAhaOHChYqLi/O79erVS5JUr149zZs3T7/99pu6d++uMWPG6JFHHjnu45100knKzMzUfffdp5YtW2rs2LEBzRUdHa3//e9/ysvLU5cuXTRx4kQ9+OCDkuR33UpVXH755Ro/frzGjh2rLl26aPny5XrggQf81lx11VXq16+fUlJS1KJFC7322mtyuVx67733dP7552vUqFFKSkrSddddp+3bt6tly5YKCwvTjz/+qBEjRigpKUnXXHON+vfvr8zMzIB+ZgDH5zLGGKeHAIBjeeWVVzRq1CgVFxcf923PAP6auEYFgFVeeuklnXLKKTrppJOUn5+ve++9V9dccw2RAoQoQgWAVYqKivTggw+qqKhIcXFxuvrqq//0pScAf1289AMAAKzFxbQAAMBahAoAALAWoQIAAKxFqAAAAGsRKgAAwFqECgAAsBahAgAArEWoAAAAa/0f7B3jBJk7/cgAAAAASUVORK5CYII=\n" }, - "metadata": {}, - "output_type": "display_data" + "metadata": {} } ], "source": [ - "q_val_sig, bdata.uns['sig_allele_counts'] = be.filter_alleles(\n", - " bdata, bdata_plasmid, filter_each_sample = True, run_parallel = True, allele_counts_key=\"allele_counts\", map_to_filtered = True)" + "plt.hist(window_edit_rate)\n", + "plt.xlabel(\"Editing rates\")\n", + "plt.ylabel(\"Frequency\")\n", + "plt.show()" ] } ], "metadata": { "colab": { - "collapsed_sections": [], - "provenance": [] + "provenance": [], + "include_colab_link": true }, "kernelspec": { "display_name": "Python [conda env:.conda-jy_anbe]", @@ -1319,354 +3279,8 @@ "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "1f08cf51bc9d4f5c96a2b2226e525553": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_d939649975ce480aa7d6ae9affd5b004", - "placeholder": "​", - "style": "IPY_MODEL_242b5df64e8a44a88076efd8f8cf8f57", - "value": " 607/3446 [00:48<02:25, 19.56it/s]" - } - }, - "22bea1b275c54009ac53ac367a18596a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "2420e49e9ce740828cd6c0383ccde9a7": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "242b5df64e8a44a88076efd8f8cf8f57": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "46805ab8d60448d3968bd07e5882f40f": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "FloatProgressModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "FloatProgressModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "ProgressView", - "bar_style": "", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_884527b49d144508a932dc3e61c6530a", - "max": 3446, - "min": 0, - "orientation": "horizontal", - "style": "IPY_MODEL_57ce91c75531417cb2447ef9d3d5ed2d", - "value": 607 - } - }, - "494439c2d852487da85750b5a52a196d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HBoxModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HBoxModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HBoxView", - "box_style": "", - "children": [ - "IPY_MODEL_eb3cccd21f1c47efa34fcbe8a42dae73", - "IPY_MODEL_46805ab8d60448d3968bd07e5882f40f", - "IPY_MODEL_1f08cf51bc9d4f5c96a2b2226e525553" - ], - "layout": "IPY_MODEL_2420e49e9ce740828cd6c0383ccde9a7" - } - }, - "57ce91c75531417cb2447ef9d3d5ed2d": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "ProgressStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "ProgressStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "bar_color": null, - "description_width": "" - } - }, - "62578c83bcf948b98878b0e8a4e044bb": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "DescriptionStyleModel", - "state": { - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "DescriptionStyleModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "StyleView", - "description_width": "" - } - }, - "884527b49d144508a932dc3e61c6530a": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "d939649975ce480aa7d6ae9affd5b004": { - "model_module": "@jupyter-widgets/base", - "model_module_version": "1.2.0", - "model_name": "LayoutModel", - "state": { - "_model_module": "@jupyter-widgets/base", - "_model_module_version": "1.2.0", - "_model_name": "LayoutModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/base", - "_view_module_version": "1.2.0", - "_view_name": "LayoutView", - "align_content": null, - "align_items": null, - "align_self": null, - "border": null, - "bottom": null, - "display": null, - "flex": null, - "flex_flow": null, - "grid_area": null, - "grid_auto_columns": null, - "grid_auto_flow": null, - "grid_auto_rows": null, - "grid_column": null, - "grid_gap": null, - "grid_row": null, - "grid_template_areas": null, - "grid_template_columns": null, - "grid_template_rows": null, - "height": null, - "justify_content": null, - "justify_items": null, - "left": null, - "margin": null, - "max_height": null, - "max_width": null, - "min_height": null, - "min_width": null, - "object_fit": null, - "object_position": null, - "order": null, - "overflow": null, - "overflow_x": null, - "overflow_y": null, - "padding": null, - "right": null, - "top": null, - "visibility": null, - "width": null - } - }, - "eb3cccd21f1c47efa34fcbe8a42dae73": { - "model_module": "@jupyter-widgets/controls", - "model_module_version": "1.5.0", - "model_name": "HTMLModel", - "state": { - "_dom_classes": [], - "_model_module": "@jupyter-widgets/controls", - "_model_module_version": "1.5.0", - "_model_name": "HTMLModel", - "_view_count": null, - "_view_module": "@jupyter-widgets/controls", - "_view_module_version": "1.5.0", - "_view_name": "HTMLView", - "description": "", - "description_tooltip": null, - "layout": "IPY_MODEL_22bea1b275c54009ac53ac367a18596a", - "placeholder": "​", - "style": "IPY_MODEL_62578c83bcf948b98878b0e8a4e044bb", - "value": "Mapping alleles to closest filtered alleles: 18%" - } - } - } } }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file