Skip to content

Commit

Permalink
allow specific pseudocount for lfc calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
jykr committed Jun 25, 2024
1 parent 4d37e08 commit cac5d9e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 21 deletions.
32 changes: 18 additions & 14 deletions perturb_tools/_framework/_ScreenModule.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ def annotate_guides(
ref_seq_path,
)

def log_norm(self, output_layer="lognorm_counts", read_count_layer=None):
def log_norm(self, output_layer="lognorm_counts", read_count_layer=None, pseudocount = 1):
if read_count_layer is None:
self.layers[output_layer] = _log_normalize_read_count(self.X)
self.layers[output_layer] = _log_normalize_read_count(self.X, pseudocount)
else:
output_layer = f"lognorm_{read_count_layer}"
self.layers[output_layer] = _log_normalize_read_count(
self.layers[read_count_layer]
self.layers[read_count_layer], pseudocount
)

# TBD: mask ones with too low raw counts.
Expand All @@ -133,22 +133,22 @@ def log_fold_change(
sample2,
lognorm_counts_key="lognorm_counts",
name=False,
pseudocount: int = 1,
out_guides_suffix="lfc",
return_result=False,
):
"""
General module to calculate LFC across experimental conditions.
"""
if "lognorm" not in lognorm_counts_key:
warnings.warn(
"The layer specified must be log-normalized values using screen.log_norm()."
)

if lognorm_counts_key not in self.layers.keys():
raise ValueError(
"Specified normalized count isn't in your layer. First run screen.log_norm()."
)

if lognorm_counts_key == "lognorm_counts":
self.log_norm(pseudocount=pseudocount)
else:
if "lognorm_" not in lognorm_counts_key:
raise ValueError(f"{lognorm_counts_key} is not a lognorm layer- feed in 'lognorm_`layer_key`' as lognorm_counts_key.")
read_count_layer_key = lognorm_counts_key.split("lognorm_")[-1]
if read_count_layer_key not in self.layers:
raise ValueError(f"{read_count_layer_key} not in .layers - feed in 'lognorm_`layer_key`' as lognorm_counts_key.")
self.log_norm(output_layer=lognorm_counts_key, read_count_layer=read_count_layer_key, pseudocount=pseudocount)
sample1_idx = np.where(sample1 == self.samples.index)[0]
sample2_idx = np.where(sample2 == self.samples.index)[0]
if len(sample1_idx) != 1 or len(sample2_idx) != 1:
Expand Down Expand Up @@ -190,6 +190,7 @@ def log_fold_change_reps(
rep_col: Union[str, List[str]] = "replicate",
compare_col="sort",
out_guides_suffix="lfc",
pseudocount=1,
keep_result=False,
ignore_missing=False,
):
Expand Down Expand Up @@ -258,6 +259,7 @@ def log_fold_change_reps(
self.samples.index[cond1_idx].tolist()[0],
self.samples.index[cond2_idx].tolist()[0],
lognorm_counts_key=lognorm_counts_key,
pseudocount=pseudocount,
return_result=True,
)
)
Expand All @@ -281,9 +283,10 @@ def log_fold_change_agg(
cond2,
lognorm_counts_key="lognorm_counts",
agg_col="replicate",
compare_col="sort",
compare_col="condition",
out_guides_suffix="lfc",
agg_fn="median",
pseudocount=1,
name=None,
return_result=False,
keep_per_replicate=False,
Expand All @@ -295,6 +298,7 @@ def log_fold_change_agg(
rep_col=agg_col,
compare_col=compare_col,
out_guides_suffix=out_guides_suffix,
pseudocount=pseudocount,
keep_result=keep_per_replicate,
)

Expand Down
12 changes: 6 additions & 6 deletions perturb_tools/_normalization/_funcs/_read_count_norm.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
import numpy as np


def _read_count_normalize(X):
def _read_count_normalize(X, pseudocount: int = 1):
"""Read depth normalization by sample. Assumes samples are columns and guides are rows."""

return (X / np.nansum(X, axis=0)) * 1e6
return (X / np.nansum(X+pseudocount, axis=0)) * 1e6


def _log_transform_read_count(X):
def _log_transform_read_count(X, pseudocount: int = 1):
""""""
return np.log2(X + 1)
return np.log2(X + pseudocount)


def _log_normalize_read_count(X):
def _log_normalize_read_count(X, pseudocount:int = 1):
"""Following the protocol written clearly, here:
https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0170445#sec002
(see Methods).
"""

X_read_norm = _read_count_normalize(X)
X_read_norm = _read_count_normalize(X, pseudocount)
X_log_read_norm = _log_transform_read_count(X_read_norm)

return X_log_read_norm
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name="perturb-tools",
version="0.3.4",
version="0.3.5",
python_requires=">3.7.0",
author=[
"Michael E. Vinyard - Harvard University - Massachussetts General Hospital - Broad Institute of MIT and Harvard",
Expand Down

0 comments on commit cac5d9e

Please sign in to comment.