Skip to content

Commit

Permalink
filter out 0 count gRNAs during run prep & update no edit run docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jykr committed May 6, 2024
1 parent 6b3d8f4 commit 58f1f08
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 9 deletions.
17 changes: 17 additions & 0 deletions bean/annotate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,18 @@ def parse_args(parser=None):
default=None,
help="Plasmid ReporterScreen object path. If provided, alleles are filtered based on if a nucleotide edit is more significantly enriched in sample compared to the plasmid data. Negative control data where no edit is expected can be fed in instead of plasmid library.",
)
parser.add_argument(
"--reporter-length",
type=int,
default=32,
help="Length of reporter sequence in the construct.",
)
parser.add_argument(
"--reporter-right-flank-length",
type=int,
default=6,
help="Length of the right-flanking nucleotides of protospacer in the reporter.",
)
parser.add_argument(
"--edit-start-pos",
"-s",
Expand All @@ -315,6 +327,11 @@ def parse_args(parser=None):
help="Jaccard Index threshold when the alleles are mapped to the most similar alleles. In each filtering step, allele counts of filtered out alleles will be mapped to the most similar allele only if they have Jaccard Index of shared edit higher than this threshold.",
default=0.3,
)
parser.add_argument(
"--filter-spacer",
help="Only consider edit within protospacer positions of reporter.",
action="store_true",
)
parser.add_argument(
"--filter-window",
"-w",
Expand Down
4 changes: 3 additions & 1 deletion bean/cli/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def main(args):
info(f"Filtered down to {len(bdata.uns['sig_allele_counts'])} alleles.")

print(len(bdata.uns[allele_df_keys[-1]]))
if len(bdata.uns[allele_df_keys[-1]]) >= 1:
if len(bdata.uns[allele_df_keys[-1]]) >= 1 and args.filter_spacer:
info("Filtering out edits outside spacer position...")
bdata.uns[f"{allele_df_keys[-1]}_spacer"] = (
bdata.filter_allele_counts_by_pos(
Expand All @@ -77,6 +77,8 @@ def main(args):
map_to_filtered=True,
allele_uns_key=allele_df_keys[-1],
jaccard_threshold=0.2,
reporter_length=args.reporter_length,
reporter_right_flank_length=args.reporter_right_flank_length,
).reset_index(drop=True)
)
info(
Expand Down
13 changes: 11 additions & 2 deletions bean/framework/ReporterScreen.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,9 @@ def get_normalized_allele_counts(self, allele_count_df=None, norm_thres=10):
norm_counts = self._get_allele_norm(
allele_count_df=allele_count_df, thres=norm_thres
)
alleles_norm.loc[:, count_columns] = alleles_norm.loc[:, count_columns].astype(float)
alleles_norm.loc[:, count_columns] = alleles_norm.loc[:, count_columns].astype(
float
)
alleles_norm.loc[:, count_columns] = (
alleles_norm.loc[:, count_columns] / norm_counts
)
Expand All @@ -637,6 +639,8 @@ def filter_allele_counts_by_pos(
map_to_filtered: bool = True,
distribute: bool = False,
jaccard_threshold: float = 0.1,
reporter_length: int = 32,
reporter_right_flank_length: int = 6,
):
"""
Filter alleles based on barcode matched counts, allele counts,
Expand All @@ -649,6 +653,8 @@ def filter_allele_counts_by_pos(
rel_pos_end: rel_pos to end including (exclusive)
rel_pos_is_reporter: rel_pos_start and rel_pos_end is 0-based relative to reporter sequence start.
jaccard_threshold (float) --
reporter_length: Length of reporter sequence
reporter_right_flank_length: Length of right-flanking nucleotides following protospacer sequence in reporter.
"""
allele_count_df = self.uns[allele_uns_key].copy()
if rel_pos_is_reporter:
Expand All @@ -663,7 +669,10 @@ def filter_allele_counts_by_pos(
if "guide_len" not in self.guides.columns.tolist():
self.guides["guide_len"] = self.guides.sequence.map(len)
guide_start_pos = (
32 - 6 - self.guides.loc[allele_count_df.guide, "guide_len"].values
reporter_length
- 1
- reporter_right_flank_length
- self.guides.loc[allele_count_df.guide, "guide_len"].values
)
filtered_allele, filtered_edits = zip(
*[
Expand Down
7 changes: 5 additions & 2 deletions bean/plotting/allele_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Plotting functions to describe allele/guide/edit stats for allele count information stored in ReporterScreen.uns[allele_df_key]."""

import numpy as np
import matplotlib.pyplot as plt

Expand Down Expand Up @@ -32,7 +33,9 @@ def plot_n_guides_per_edit(
+ list(map(lambda e: e.get_abs_edit(), a.nt_allele.edits))
)
elif allele_col == "allele":
a["edits"] = a[allele_col].map(lambda a: list(a.edits))
a["edits"] = a[allele_col].map(
lambda a: list(map(lambda e: e.get_abs_edit(), a.edits))
)
edits_df = a.explode("edits")[["guide", "edits"]]
edits_df["edits"] = edits_df.edits.map(str)
edits_df = edits_df.loc[~edits_df.edits.str.startswith("-"), :].drop_duplicates()
Expand Down Expand Up @@ -61,5 +64,5 @@ def plot_allele_stats(bdata, allele_df_keys, plot_save_path):
plot_n_alleles_per_guide(bdata, key, bdata.uns[key].columns[1], ax[0])
plot_n_guides_per_edit(bdata, key, bdata.uns[key].columns[1], ax[1])

#plt.tight_layout()
# plt.tight_layout()
fig.savefig(plot_save_path, bbox_inches="tight")
5 changes: 5 additions & 0 deletions bean/preprocessing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ def prepare_bdata(bdata: be.ReporterScreen, args, warn, prefix: str):
f"Some target column (bdata.guides[{args.target_col}]) value is null. Check your input file."
)
bdata = bdata[bdata.guides[args.target_col].argsort(), :]
if any(bdata.X.sum(axis=1) > 0):
warn(
f"Filtering out {sum(bdata.X.sum(axis=1) > 0)} gRNAs without any counts over all samples."
)
bdata = bdata[bdata.X.sum(axis=1) > 0, :]
n_no_support_targets, bdata = filter_no_info_target(
bdata,
condit_col=args.condition_col,
Expand Down
4 changes: 2 additions & 2 deletions docs/_create-screen.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
bean create-screen gRNA_library.csv sample_list.csv gRNA_counts_table.csv
```
## Input
* gRNA_library.csv ([example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/var_mini_guides.csv))
* sample_list.csv ([example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/var_mini_samples.csv))
* gRNA_library.csv ([`variant` screen example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/var_mini_guides.csv), [`tiling` screen example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/test_guide_info_tiling_chrom.csv))
* sample_list.csv ([`sorting` screen example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/sample_list_survival.csv), [`variant` screen example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/var_mini_samples.csv))
* gRNA_counts_table.csv: Table with gRNA ID in the first column and sample IDs as the column names (first row)
`gRNA_library.csv` and `sample_list.csv` should be formatted as :ref:`input`. ([example](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/var_mini_counts.csv))
8 changes: 6 additions & 2 deletions docs/_tutorial_no_edit.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ bean qc \
bean run sorting variant \
${working_dir}/bean_count_${screen_id}_masked.h5ad \
-o ${working_dir}/ \
--uniform-edit `# As we have no edit information.` \
[--no-negative-control] `# If you don't have the negative control gRNAs.`
--uniform-edit --ignore-bcmatch `# As we have no edit/reporter information.` \
[--fit-negctrl [--negctrl-col target_group --negctrl-col-value NegCtrl]] `# If you have the negative control gRNAs.`
```
## Input file spec
* gRNA_info.csv: Should have `name`, `target` columns. You can also specify `target_group` column whose value indicate `PosCtrl`/`NegCtrl` for control gRNAs.
* sample_list.csv: Same requirement for the full run. See examples for [`sorting` screens](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/var_mini_samples.csv) and [`survival` screens](https://github.com/pinellolab/crispr-bean/blob/main/tests/data/sample_list_survival.csv).
See the example input files [here](https://pinellolab.github.io/crispr-bean/create_screen.html).

0 comments on commit 58f1f08

Please sign in to comment.