Skip to content

Commit

Permalink
debug errors in profile due to dual editors
Browse files Browse the repository at this point in the history
  • Loading branch information
jykr committed May 6, 2024
1 parent 4e28ba5 commit 714d4f2
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 258 deletions.
2 changes: 1 addition & 1 deletion bean/cli/execute.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import argparse
from bean.mapping.utils import get_input_parser_count as get_count_parser
from bean.mapping.utils import get_input_parser as get_count_samples_parser
from bean.plotting.utils import parse_args as get_profile_parser
from bean.plotting.parser import parse_args as get_profile_parser
from bean.qc.parser import parse_args as get_qc_parser
from bean.annotate.utils import parse_args as get_filter_parser
from bean.model.parser import parse_args as get_run_parser
Expand Down
2 changes: 1 addition & 1 deletion bean/cli/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
import papermill as pm
import bean as be
from bean.plotting.utils import parse_args, check_args
from bean.plotting.utils import check_args


def main(args):
Expand Down
163 changes: 17 additions & 146 deletions bean/notebooks/profile_editing_preference.ipynb

Large diffs are not rendered by default.

170 changes: 119 additions & 51 deletions bean/plotting/editing_patterns.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from copy import deepcopy
from typing import Optional, Sequence, Dict, List, Union
from typing import Optional, Sequence, Dict, List, Union, Tuple
import re
from ..framework.Edit import Edit
from bean import ReporterScreen
from bean.framework.Edit import Edit
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import logomaker
from tqdm.auto import tqdm

BASES = ["A", "T", "C", "G"]
Expand Down Expand Up @@ -330,14 +332,16 @@ def plot_by_pos_behive(
return axes, df


def get_position_by_pam_rates(bdata, edit_rates_df: pd.DataFrame, pam_col="5-nt PAM"):
def get_position_by_pam_rates(
bdata, edit_rates_df: pd.DataFrame, target_base_change: str, pam_col="5-nt PAM"
):
edit_rates_df["pam"] = bdata.guides.loc[edit_rates_df.guide, pam_col].reset_index(
drop=True
)
edit_rates_df["pam23"] = edit_rates_df.pam.map(lambda s: s[1:3])
return pd.pivot(
edit_rates_df.loc[
(edit_rates_df.base_change == bdata.target_base_change),
(edit_rates_df.base_change == target_base_change),
["rep_mean", "pam23", "spacer_pos"],
]
.groupby(["pam23", "spacer_pos"])
Expand All @@ -350,53 +354,42 @@ def get_position_by_pam_rates(bdata, edit_rates_df: pd.DataFrame, pam_col="5-nt


def plot_by_pos_pam(
bdata,
edit_rates_df,
pam_col="5-nt PAM",
ax=None,
figsize=(6, 4),
):
"""Plot position by PAM"""
pos_by_pam = get_position_by_pam_rates(bdata, edit_rates_df, pam_col)
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
sns.heatmap(pos_by_pam, ax=ax, cmap="Blues")
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
return pos_by_pam


def plot_by_pos_pam_and_context(bdata, edit_rates_df, figsize=(6, 6)):
pos_by_pam = get_position_by_pam_rates(bdata, edit_rates_df)
fig, ax = plt.subplots(2, 1, figsize=figsize)
sns.scatterplot(
edit_rates_df.loc[(edit_rates_df.base_change == bdata.target_base_change)],
x="spacer_pos_ctxt",
y="rep_mean",
hue="context",
alpha=0.3,
ax=ax[0],
s=5,
rasterized=True,
)
ax[0].legend(bbox_to_anchor=(1.02, 0.5), loc="center left", title="Context")
ax[0].set_xlabel("Protospacer position")
ax[0].set_xticks(list(range(1, 21)))
ax[0].set_xticklabels(list(range(1, 21)))
ax[0].set_ylabel(f"{bdata.target_base_change} editing rate")
ax[0].set_ylim((0, 1))
ax[0].set_xlim((0.5, 20.5))

sns.heatmap(pos_by_pam, ax=ax[1], cmap="Blues", cbar=False)
ax[1].set_yticklabels(ax[1].get_yticklabels(), rotation=0)
cbax = fig.add_axes(
[0.85, 0.15, 0.02, 0.3],
)
ax[1].set_ylabel("PAM")
ax[1].set_yticklabels([f"N{t.get_text()}" for t in ax[1].get_yticklabels()])
ax[1].set_xlabel("Protospacer position")
fig.colorbar(ax[1].get_children()[0], cbax, label="editing rate")
plt.tight_layout()
return ax
bdata: ReporterScreen,
edit_rates_df: pd.DataFrame,
target_base_changes: List[str],
pam_col: str = "5-nt PAM",
axes=None,
figsize: Tuple[float, float] = (6, 4),
save_fig: bool = False,
save_path: Optional[str] = None,
) -> pd.DataFrame:
"""Plot editing efficiency for each position x PAM combination."""
if axes is None:
fig, axes = plt.subplots(
1,
len(target_base_changes),
figsize=(figsize[0] * len(target_base_changes), figsize[1]),
)
if len(target_base_changes) == 1:
axes = np.array([axes])
pos_by_pam_basechanges = []
for i, target_base_change in enumerate(target_base_changes):
edit_rates_df_base = edit_rates_df.loc[
edit_rates_df.base_change == target_base_change, :
].reset_index(drop=True)
pos_by_pam = get_position_by_pam_rates(
bdata, edit_rates_df_base, target_base_change, pam_col
)
sns.heatmap(pos_by_pam, ax=axes[i], cmap="Blues")
axes[i].set_yticklabels(axes[i].get_yticklabels(), rotation=0)
axes[i].set_title(target_base_change)
pos_by_pam["base_change"] = target_base_change
pos_by_pam_basechanges.append(pos_by_pam)
if save_fig:
if save_path is None:
save_path = "bean_profile_pos_by_pam.pdf"
plt.savefig(save_path, bbox_inches="tight")
return pd.concat(pos_by_pam_basechanges, axis=0)


def get_pam_preference(
Expand Down Expand Up @@ -461,3 +454,78 @@ def plot_pam_preference(
ax.set(xlabel=None, ylabel=None)
ax.set_title("PAM preference")
return ax


def plot_context_specificity(
bdata: ReporterScreen,
edit_rates_df: pd.DataFrame,
target_base_changes: List[str],
window: Tuple[int, int],
axes=None,
figsize: Tuple[float, float] = (3, 5),
save_fig: bool = False,
save_path: Optional[str] = None,
) -> pd.DataFrame:
window_start, window_end = window
if axes is None:
fig, axes = plt.subplots(
1,
len(target_base_changes),
figsize=(figsize[0] * len(target_base_changes), figsize[1]),
)
if len(target_base_changes) == 1:
axes = np.array([axes])
save_tbls = []
for j, target_base_change in enumerate(target_base_changes):
edit_rates_df_base = edit_rates_df.loc[
edit_rates_df.base_change == target_base_change, :
].reset_index(drop=True)
edit_rates_df_base_window = edit_rates_df_base.loc[
(edit_rates_df_base.spacer_pos >= window_start)
& (edit_rates_df_base.spacer_pos <= window_end)
].copy()
edit_rates_df_base_window["context"] = edit_rates_df_base_window.apply(
lambda row: bdata.guides.loc[row.guide, "reporter"][
row.rel_pos - 1 : row.rel_pos + 2
],
axis=1,
)
context_mean_edit_rate = {}
bases = ["A", "C", "G", "T"]

for i in range(3):
edit_rates_df_base_window[f"context_{i}"] = (
edit_rates_df_base_window.context.map(lambda s: s[i])
)
context_mean_edit_rate[i] = edit_rates_df_base_window.groupby(
f"context_{i}"
)["rep_mean"].mean()
context_mean_edit_rate[i] = (
context_mean_edit_rate[i].reindex(bases).fillna(0)
)

target_df = pd.DataFrame(
[int(b == target_base_change[0]) for b in bases], index=bases
)
ic_tbl = pd.concat(
[
context_mean_edit_rate[0] / context_mean_edit_rate[0].sum(),
target_df,
context_mean_edit_rate[2] / context_mean_edit_rate[2].sum(),
],
axis=1,
).T
save_tbl = ic_tbl.transpose()
save_tbl["base_change"] = target_base_change
save_tbls.append(save_tbl)
ic_tbl.index = [-1, 0, 1]
logomaker.Logo(ic_tbl.astype(float), ax=axes[j])
axes[j].set_ylabel("Relative frequency")
axes[j].set_title(target_base_change)
if save_fig:
if save_path is None:
save_path = (
f"bean_profile_context_preference_{window_start}_{window_end}.pdf"
)
fig.savefig(save_path, bbox_inches="tight")
return pd.concat(save_tbls, axis=0)
67 changes: 12 additions & 55 deletions bean/plotting/utils.py
Original file line number Diff line number Diff line change
@@ -1,58 +1,6 @@
import argparse
import os


def parse_args(parser=None):
if parser is None:
parser = argparse.ArgumentParser()
parser.add_argument(
"bdata_path", help="Path to the ReporterScreen object to run QC on", type=str
)
parser.add_argument(
"-o",
"--output-prefix",
help="Output prefix of editing pattern report (prefix.html, prefix.ipynb). If not provided, base name of `bdata_path` is used.",
type=str,
)
parser.add_argument(
"--replicate-col",
help="Column name in `bdata.samples` that describes replicate ID.",
type=str,
default="replicate",
)
parser.add_argument(
"--condition-col",
help="Column name in `bdata.samples` that describes experimental condition. (sorting bin, time, etc.)",
type=str,
default="condition",
)
parser.add_argument(
"--pam-col",
help="Column name describing PAM of each gRNA in `bdata.guides`.",
type=str,
default=None,
)
parser.add_argument(
"--control-condition",
help="Control condition where editing preference would be profiled at. Pre-filters data where `bdata.samples[condition_col] == control_condition`.",
type=str,
default="bulk",
)
parser.add_argument(
"-w",
"--window-length",
help="Window length of editing window of maximal editing efficiency to be identified. This window is used to quantify context specificity within the window.",
type=int,
default=6,
)
parser.add_argument(
"--save-fig",
action="store_true",
help="Save .pdf of the figures included in the report.",
)

return parser

import bean as be

def check_args(args):
if args.output_prefix is None:
Expand All @@ -63,6 +11,15 @@ def check_args(args):
os.makedirs(args.output_prefix, exist_ok=True)
if args.window_length < 1:
raise ValueError(f"window_length {args.window_length} is too small.")
if args.window_length > 20:
raise ValueError(f"window_length {args.window_length} is too large.")
cdata = be.read_h5ad(args.bdata_path)
cdata.samples["replicate"] = cdata.samples[args.replicate_col]
cdata_bulk = cdata[:,cdata.samples[args.condition_col] == args.control_condition]
if len(cdata_bulk) == 0:
raise ValueError(
f"No samples with bdata.samples['{args.condition_col}'] == {args.control_condition}. Please check your input arguments --condition-col & --control-condition."
)
if args.pam_col is not None and args.pam_col not in cdata.guides.columns:
raise ValueError(
f"Specified --pam-col `{args.pam_col}` does not exist in ReporterScreen.guides.columns ({cdata.guides.columns}). Please check your input. If you don't want PAM output, please do not provide --pam-col argument.s"
)
return args
2 changes: 1 addition & 1 deletion docs/profile.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@
Full parameters
==================
.. argparse::
:filename: ../bean/plotting/utils.py
:filename: ../bean/plotting/parser.py
:func: parse_args
:prog: bean profile
Binary file modified tests/data/var_mini_screen.h5ad
Binary file not shown.
7 changes: 4 additions & 3 deletions tests/test_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@


def test_profile_screen():
cmd = "bean profile tests/data/var_mini_screen.h5ad "
cmd = "bean profile tests/data/var_mini_screen.h5ad --pam-col '5-nt PAM'"
try:
subprocess.check_output(
cmd,
Expand All @@ -13,13 +13,14 @@ def test_profile_screen():
except subprocess.CalledProcessError as exc:
raise exc


def test_profile_screen_dualeditor():
cmd = "bean profile tests/data/var_mini_screen_dual.h5ad "
cmd = "bean profile tests/data/var_mini_screen_dual.h5ad --pam-col '5-nt PAM'"
try:
subprocess.check_output(
cmd,
shell=True,
universal_newlines=True,
)
except subprocess.CalledProcessError as exc:
raise exc
raise exc

0 comments on commit 714d4f2

Please sign in to comment.