From e198ae2cc0d7c505b6fa744bcccf7db7bfc94646 Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 11:58:44 -0700 Subject: [PATCH 1/7] add sparse distance mat support --- hotspot/hotspot.py | 14 +++++++++----- hotspot/knn.py | 8 ++++++-- pyproject.toml | 2 +- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/hotspot/hotspot.py b/hotspot/hotspot.py index a31b9df..197ba10 100644 --- a/hotspot/hotspot.py +++ b/hotspot/hotspot.py @@ -91,10 +91,14 @@ def __init__( ) if distances is not None: - assert not issparse(distances) - distances = pd.DataFrame( - distances, index=adata.obs_names, columns=adata.obs_names - ) + if issparse(distances): + distances = pd.DataFrame.sparse.from_spmatrix( + distances, index=adata.obs_names, columns=adata.obs_names + ) + else: + distances = pd.DataFrame( + distances, index=adata.obs_names, columns=adata.obs_names + ) if latent is not None: latent = pd.DataFrame(latent, index=adata.obs_names) @@ -260,7 +264,7 @@ def legacy_init( input_adata.obs[tc_key] = umi_counts.values dkey = "distances" if distances is not None: - input_adata.obsp[dkey] = np.asarray(distances) + input_adata.obsp[dkey] = distances dist_input = True else: dist_input = False diff --git a/hotspot/knn.py b/hotspot/knn.py index 0a84a12..8a6845b 100644 --- a/hotspot/knn.py +++ b/hotspot/knn.py @@ -61,10 +61,14 @@ def neighbors_and_weights_from_distances( weights: pandas.Dataframe num_cells x n_neighbors """ - + # detects if distances are sparse in the DataFrame + try: + dist_mat = distances.sparse.to_coo() + except AttributeError: + dist_mat = distances.values nbrs = NearestNeighbors( n_neighbors=n_neighbors, algorithm="brute", metric="precomputed" - ).fit(distances.values) + ).fit(dist_mat) dist, ind = nbrs.kneighbors() weights = compute_weights(dist, neighborhood_factor=neighborhood_factor) diff --git a/pyproject.toml b/pyproject.toml index 30c30d5..8caa86b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ packages = [ {include = "hotspot"}, ] readme = "README.md" -version = "1.1" +version = "1.1.1" [tool.poetry.dependencies] pytest = {version = ">=5.0", optional = true} From d5f1debbc54e9881df5311e8a7ab5b1672e638c8 Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 12:01:56 -0700 Subject: [PATCH 2/7] add test --- pyproject.toml | 3 ++- tests/test_validations.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8caa86b..f3818f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,10 +50,11 @@ sphinx-book-theme = {version= ">=0.2.0", optional = true} nbsphinx = {version = "*", optional = true} sphinx = {version = ">=4.1", optional = true} ipython = {version = "*", optional = true} +scanpy = {version = "*", optional = true} [tool.poetry.extras] -test = ["pytest"] +test = ["pytest", "scanpy"] docs=["sphinx-book-theme", "nbsphinx", "sphinx>", "ipython"] [tool.poetry.dev-dependencies] diff --git a/tests/test_validations.py b/tests/test_validations.py index e4da293..d62b507 100644 --- a/tests/test_validations.py +++ b/tests/test_validations.py @@ -4,6 +4,7 @@ from hotspot import Hotspot import anndata import pytest +import scanpy as sc from scipy.sparse import csc_matrix @@ -73,6 +74,18 @@ def test_models(): assert isinstance(hs.module_scores, pd.DataFrame) assert (hs.module_scores.index == gene_exp.columns).all() + # test precomputed distance matrix + sc.pp.neighbors(adata, use_rep="latent", n_neighbors=30) + hs = Hotspot( + adata, + model="normal", + umi_counts_obs_key="umi_counts", + layer_key="sparse", + distances_obsp_key="distances" + ) + hs.create_knn_graph(False, n_neighbors=30) + hs.compute_autocorrelations() + def test_filter_genes(): """ From 0f6abd5ba096b250677c13964909dbbd15033b9f Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 12:04:29 -0700 Subject: [PATCH 3/7] remove erroneous docs sentence --- docs/source/hotspot.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/source/hotspot.rst b/docs/source/hotspot.rst index 26e7557..e926387 100644 --- a/docs/source/hotspot.rst +++ b/docs/source/hotspot.rst @@ -1,8 +1,5 @@ Function Reference ****************** - -Where does this stuff get included? - .. autoclass:: hotspot.Hotspot :members: From 6aeffb1de729f862f49192347b67e91bcc4ed4a3 Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 12:07:53 -0700 Subject: [PATCH 4/7] no dataframe for distances --- hotspot/hotspot.py | 10 ---------- hotspot/knn.py | 11 +++-------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/hotspot/hotspot.py b/hotspot/hotspot.py index 197ba10..88c5256 100644 --- a/hotspot/hotspot.py +++ b/hotspot/hotspot.py @@ -90,16 +90,6 @@ def __init__( "Both `distances_obsp_key` and `tree` provided - only one of these should be provided." ) - if distances is not None: - if issparse(distances): - distances = pd.DataFrame.sparse.from_spmatrix( - distances, index=adata.obs_names, columns=adata.obs_names - ) - else: - distances = pd.DataFrame( - distances, index=adata.obs_names, columns=adata.obs_names - ) - if latent is not None: latent = pd.DataFrame(latent, index=adata.obs_names) diff --git a/hotspot/knn.py b/hotspot/knn.py index 8a6845b..081b2c0 100644 --- a/hotspot/knn.py +++ b/hotspot/knn.py @@ -45,7 +45,7 @@ def neighbors_and_weights(data, n_neighbors=30, neighborhood_factor=3, approx_ne def neighbors_and_weights_from_distances( - distances, n_neighbors=30, neighborhood_factor=3 + distances, cell_index, n_neighbors=30, neighborhood_factor=3 ): """ Computes nearest neighbors and associated weights using @@ -61,19 +61,14 @@ def neighbors_and_weights_from_distances( weights: pandas.Dataframe num_cells x n_neighbors """ - # detects if distances are sparse in the DataFrame - try: - dist_mat = distances.sparse.to_coo() - except AttributeError: - dist_mat = distances.values nbrs = NearestNeighbors( n_neighbors=n_neighbors, algorithm="brute", metric="precomputed" - ).fit(dist_mat) + ).fit(distances) dist, ind = nbrs.kneighbors() weights = compute_weights(dist, neighborhood_factor=neighborhood_factor) - ind = pd.DataFrame(ind, index=distances.index) + ind = pd.DataFrame(ind, index=cell_index) neighbors = ind weights = pd.DataFrame( weights, index=neighbors.index, columns=neighbors.columns From b3c81a6917d730e7ab51fb4ea1f5b805be027fb9 Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 16:26:49 -0700 Subject: [PATCH 5/7] fix neighbors when precomputed --- hotspot/knn.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/hotspot/knn.py b/hotspot/knn.py index 081b2c0..0392284 100644 --- a/hotspot/knn.py +++ b/hotspot/knn.py @@ -5,6 +5,7 @@ from numba import jit from tqdm import tqdm from pynndescent import NNDescent +import warnings def neighbors_and_weights(data, n_neighbors=30, neighborhood_factor=3, approx_neighbors=True): @@ -26,8 +27,9 @@ def neighbors_and_weights(data, n_neighbors=30, neighborhood_factor=3, approx_ne coords = data.values if approx_neighbors: - index = NNDescent(coords, n_neighbors=n_neighbors) - ind, dist = index.neighbor_graph + # pynndescent first neighbor is self, unlike sklearn + index = NNDescent(coords, n_neighbors=n_neighbors + 1) + ind[:, 1:], dist[:, 1:] = index.neighbor_graph else: nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree").fit(coords) @@ -64,7 +66,13 @@ def neighbors_and_weights_from_distances( nbrs = NearestNeighbors( n_neighbors=n_neighbors, algorithm="brute", metric="precomputed" ).fit(distances) - dist, ind = nbrs.kneighbors() + try: + dist, ind = nbrs.kneighbors() + # already is a neighbors graph + except ValueError: + nn = np.asarray((distances[0] > 0).sum()) + warnings.warn(f"Provided cell-cell distance graph is likely a {nn}-neighbors graph. Using {nn} precomputed neighbors.") + dist, ind = nbrs.kneighbors(n_neighbors=nn-1) weights = compute_weights(dist, neighborhood_factor=neighborhood_factor) From 2fca1bc0341c9b57a19e10895b0508926c6a86b4 Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 16:30:26 -0700 Subject: [PATCH 6/7] fixes --- hotspot/knn.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hotspot/knn.py b/hotspot/knn.py index 0392284..436acc2 100644 --- a/hotspot/knn.py +++ b/hotspot/knn.py @@ -29,7 +29,8 @@ def neighbors_and_weights(data, n_neighbors=30, neighborhood_factor=3, approx_ne if approx_neighbors: # pynndescent first neighbor is self, unlike sklearn index = NNDescent(coords, n_neighbors=n_neighbors + 1) - ind[:, 1:], dist[:, 1:] = index.neighbor_graph + ind, dist = index.neighbor_graph + ind, dist = ind[:, 1:], dist[:, 1:] else: nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm="ball_tree").fit(coords) From b3da53644f7b2f039916ea22033cd9fb5664de8d Mon Sep 17 00:00:00 2001 From: adamgayoso Date: Sun, 8 May 2022 16:42:28 -0700 Subject: [PATCH 7/7] finalize --- hotspot/knn.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hotspot/knn.py b/hotspot/knn.py index 436acc2..82d7885 100644 --- a/hotspot/knn.py +++ b/hotspot/knn.py @@ -64,6 +64,9 @@ def neighbors_and_weights_from_distances( weights: pandas.Dataframe num_cells x n_neighbors """ + if isinstance(distances, pd.DataFrame): + distances = distances.values + nbrs = NearestNeighbors( n_neighbors=n_neighbors, algorithm="brute", metric="precomputed" ).fit(distances)