Skip to content

Commit

Permalink
Skip correlation calculation in ctx if it already exists in the adj
Browse files Browse the repository at this point in the history
input file
  • Loading branch information
cflerin committed Nov 24, 2020
1 parent ea9e6de commit 436561f
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions src/pyscenic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,14 +261,18 @@ def iter_modules(adjc, context):
# Relationship between TF and its target, i.e. activator or repressor, is derived using the original expression
# profiles. The Pearson product-moment correlation coefficient is used to derive this information.

# Add correlation column and create two disjoint set of adjacencies.
LOGGER.info("Calculating Pearson correlations.")
# test for genes present in the adjacencies but not present in the expression matrix:
unique_adj_genes = set(adjacencies[COLUMN_NAME_TF]).union(set(adjacencies[COLUMN_NAME_TARGET])) - set(ex_mtx.columns)
assert len(unique_adj_genes)==0, f"Found {len(unique_adj_genes)} genes present in the network (adjacencies) output, but missing from the expression matrix. Is this a different gene expression matrix?"
LOGGER.warn(f"Note on correlation calculation: the default behaviour for calculating the correlations has changed after pySCENIC verion 0.9.16. Previously, the default was to calculate the correlation between a TF and target gene using only cells with non-zero expression values (mask_dropouts=True). The current default is now to use all cells to match the behavior of the R verision of SCENIC. The original settings can be retained by setting 'rho_mask_dropouts=True' in the modules_from_adjacencies function, or '--mask_dropouts' from the CLI.\n\tDropout masking is currently set to [{rho_mask_dropouts}].")
adjacencies = add_correlation(adjacencies, ex_mtx,
rho_threshold=rho_threshold, mask_dropouts=rho_mask_dropouts)
if not {'regulation', 'rho'}.issubset(adjacencies.columns):
# Add correlation column and create two disjoint set of adjacencies.
LOGGER.info("Calculating Pearson correlations.")
# test for genes present in the adjacencies but not present in the expression matrix:
unique_adj_genes = set(adjacencies[COLUMN_NAME_TF]).union(set(adjacencies[COLUMN_NAME_TARGET])) - set(ex_mtx.columns)
assert len(unique_adj_genes)==0, f"Found {len(unique_adj_genes)} genes present in the network (adjacencies) output, but missing from the expression matrix. Is this a different gene expression matrix?"
LOGGER.warn(f"Note on correlation calculation: the default behaviour for calculating the correlations has changed after pySCENIC verion 0.9.16. Previously, the default was to calculate the correlation between a TF and target gene using only cells with non-zero expression values (mask_dropouts=True). The current default is now to use all cells to match the behavior of the R verision of SCENIC. The original settings can be retained by setting 'rho_mask_dropouts=True' in the modules_from_adjacencies function, or '--mask_dropouts' from the CLI.\n\tDropout masking is currently set to [{rho_mask_dropouts}].")
adjacencies = add_correlation(adjacencies, ex_mtx,
rho_threshold=rho_threshold, mask_dropouts=rho_mask_dropouts)
else:
LOGGER.info("Using existing Pearson correlations from the adjacencies file.")

activating_modules = adjacencies[adjacencies[COLUMN_NAME_REGULATION] > 0.0]
if keep_only_activating:
modules_iter = iter_modules(activating_modules, frozenset([ACTIVATING_MODULE]))
Expand Down

0 comments on commit 436561f

Please sign in to comment.