Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for dual editor #23

Merged
merged 1 commit into from
Apr 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions bean/annotate/_supporting_fn.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from copy import deepcopy
from typing import List, Tuple, Union
from typing import List, Union, Dict, Optional
from tqdm.auto import tqdm
from ..framework.Edit import Edit, Allele
import numpy as np
from ..framework.Edit import Allele
from ..framework.AminoAcidEdit import CodingNoncodingAllele
import pandas as pd
from ..annotate.translate_allele import CDS, RefBaseMismatchException


def filter_allele_by_pos(
allele: Allele,
pos_start: int = None,
pos_end: int = None,
pos_start: Optional[Union[float, int]] = None,
pos_end: Optional[Union[float, int]] = None,
filter_rel_pos=True,
):
"""
Expand All @@ -24,6 +25,10 @@ def filter_allele_by_pos(
filtered_edits = 0
allele_filtered = deepcopy(allele)
if not (pos_start is None and pos_end is None):
if pos_start is None:
pos_start = -np.inf
if pos_end is None:
pos_end = np.inf
if filter_rel_pos:
for edit in allele.edits:
if not (edit.rel_pos >= pos_start and edit.rel_pos < pos_end):
Expand All @@ -34,17 +39,16 @@ def filter_allele_by_pos(
if not (edit.pos >= pos_start and edit.pos < pos_end):
filtered_edits += 1
allele_filtered.edits.remove(edit)

else:
print("No threshold specified") # TODO: warn
return (allele_filtered, filtered_edits)


def filter_allele_by_base(
allele: Allele,
allowed_base_changes: List[Tuple] = None,
allowed_ref_base: Union[List, str] = None,
allowed_alt_base: Union[List, str] = None,
allowed_base_changes: Optional[Dict[str, str]] = None,
allowed_ref_base: Optional[Union[List, str]] = None,
allowed_alt_base: Optional[Union[List, str]] = None,
):
"""
Filter alleles based on position and return the filtered allele and
Expand All @@ -55,28 +59,29 @@ def filter_allele_by_base(
allowed_ref_base = [allowed_ref_base]
if isinstance(allowed_alt_base, str):
allowed_alt_base = [allowed_alt_base]
if (
not (allowed_ref_base is None and allowed_alt_base is None)
+ (allowed_base_changes is None)
== 1
):
if (allowed_ref_base is None and allowed_alt_base is None) + (
allowed_base_changes is None
) != 1:
print("No filters specified or misspecified filters.")
elif not allowed_base_changes is None:
elif allowed_base_changes is not None:
for edit in allele.edits.copy():
if not (edit.ref_base, edit.alt_base) in allowed_base_changes:
if (
edit.ref_base not in allowed_base_changes
or allowed_base_changes[edit.ref_base] != edit.alt_base
):
filtered_edits += 1
allele.edits.remove(edit)
elif not allowed_ref_base is None:
elif allowed_ref_base is not None:
for edit in allele.edits.copy():
if edit.ref_base not in allowed_ref_base:
filtered_edits += 1
allele.edits.remove(edit)
elif not allowed_alt_base is None and edit.alt_base not in allowed_alt_base:
elif allowed_alt_base is not None and edit.alt_base not in allowed_alt_base:
filtered_edits += 1
allele.edits.remove(edit)
else:
for edit in allele.edits.copy():
if edit.alt_base not in allowed_alt_base:
if edit.alt_base not in allowed_alt_base: # type: ignore
filtered_edits += 1
allele.edits.remove(edit)
return (allele, filtered_edits)
Expand Down Expand Up @@ -105,7 +110,9 @@ def map_alleles_to_filtered(
):
guide_filtered_allele_counts = filtered_allele_counts.loc[
filtered_allele_counts.guide == guide, :
].set_index("allele")
].set_index(
"allele"
) # type: ignore
guide_filtered_alleles = guide_filtered_allele_counts.index.tolist()
if len(guide_filtered_alleles) == 0:
pass
Expand Down
2 changes: 1 addition & 1 deletion bean/annotate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def parse_args(parser=None):
parser.add_argument(
"--filter-target-basechange",
"-b",
help="Only consider target edit (stored in bdata.uns['target_base_change'])",
help="Only consider target edit (stored in bdata.uns['target_base_changes'])",
action="store_true",
)
parser.add_argument(
Expand Down
9 changes: 6 additions & 3 deletions bean/cli/count_samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
args_dict["output_folder"] = os.path.join(args.output_folder, sample_id)

base_editing_map = {"A": "G", "C": "T"}
edited_from = args_dict["edited_base"]
edited_to = base_editing_map[edited_from]
try:
target_base_edits = {k: base_editing_map[k] for k in args_dict["edited_base"]}
except KeyError as e:
raise KeyError(args_dict["edited_base"]) from e

match_target_pos = args_dict["match_target_pos"]
if (
"guide_start_seqs_tbl" in args_dict
Expand Down Expand Up @@ -75,7 +78,7 @@ def count_sample(R1: str, R2: str, sample_id: str, args: argparse.Namespace):
raise ValueError(
f"File {counter.output_dir}.h5ad doesn't have alllele information stored."
) from exc
screen.get_edit_mat_from_uns(edited_from, edited_to, match_target_pos)
screen.get_edit_mat_from_uns(target_base_edits, match_target_pos)
info(
f"Reading already existing data for {sample_id} from \n\
{counter.output_dir}.h5ad"
Expand Down
17 changes: 8 additions & 9 deletions bean/cli/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys

import logging
from itertools import product
import pandas as pd
import bean as be
import bean.annotate.filter_alleles as filter_alleles
Expand Down Expand Up @@ -109,24 +110,22 @@ def main(args):

if len(bdata.uns[allele_df_keys[-1]]) > 0 and not args.keep_indels:
filtered_key = f"{allele_df_keys[-1]}_noindels"
info(f"Filtering out indels...")
info("Filtering out indels...")
bdata.uns[filtered_key] = bdata.filter_allele_counts_by_base(
["A", "T", "G", "C"],
["A", "T", "G", "C"],
{k: v for k, v in product(["A", "C", "T", "G"], ["A", "C", "T", "G"])},
map_to_filtered=False,
allele_uns_key=allele_df_keys[-1],
).reset_index(drop=True)
info(f"Filtered down to {len(bdata.uns[filtered_key])} alleles.")
allele_df_keys.append(filtered_key)

if len(bdata.uns[allele_df_keys[-1]]) > 0 and args.filter_target_basechange:
filtered_key = (
f"{allele_df_keys[-1]}_{bdata.base_edited_from}.{bdata.base_edited_to}"
)
info(f"Filtering out non-{bdata.uns['target_base_change']} edits...")
if "target_base_changes" not in bdata.uns and "target_base_change" in bdata.uns:
bdata.uns["target_base_changes"] = bdata.uns["target_base_change"]
filtered_key = f"{allele_df_keys[-1]}_{bdata.uns['target_base_changes']}"
info(f"Filtering out non-{bdata.uns['target_base_changes']} edits...")
bdata.uns[filtered_key] = bdata.filter_allele_counts_by_base(
bdata.base_edited_from,
bdata.base_edited_to,
bdata.target_base_changes,
map_to_filtered=False,
allele_uns_key=allele_df_keys[-1],
).reset_index(drop=True)
Expand Down
10 changes: 5 additions & 5 deletions bean/framework/Edit.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from __future__ import annotations
from typing import Iterable
from typing import Iterable, Optional
import numpy as np
import re
from ..utils.arithmetric import jaccard
Expand All @@ -12,10 +12,10 @@ class Edit:
def __init__(
self,
rel_pos: int,
ref_base: chr,
alt_base: chr,
chrom: str = None,
offset: int = None,
ref_base: str,
alt_base: str,
chrom: Optional[str] = None,
offset: Optional[int] = None,
strand: int = 1,
unique_identifier=None,
):
Expand Down
Loading
Loading