Skip to content

Commit

Permalink
ENH: natively support Graph in all Join Counts statistics (#295)
Browse files Browse the repository at this point in the history
* join_counts

* local join counts

* remaining local join counts

* use private s0

* handle drop_islands

* compute s0 directly
  • Loading branch information
martinfleis authored Jul 20, 2024
1 parent 6d47388 commit b183177
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 168 deletions.
47 changes: 30 additions & 17 deletions esda/join_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
Spatial autocorrelation for binary attributes
"""

__author__ = "Sergio J. Rey <srey@asu.edu> , Luc Anselin <luc.anselin@asu.edu>"

import warnings

import numpy as np
import pandas as pd
from libpysal.weights import W
from scipy.stats import chi2, chi2_contingency

from .crand import njit as _njit
Expand All @@ -18,7 +20,7 @@
PERMUTATIONS = 999


class Join_Counts(object):
class Join_Counts:
"""Binary Join Counts
Expand All @@ -27,8 +29,8 @@ class Join_Counts(object):
y : array
binary variable measured across n spatial units
w : W
spatial weights instance
w : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo-p_values
Expand Down Expand Up @@ -148,14 +150,24 @@ class Join_Counts(object):

def __init__(self, y, w, permutations=PERMUTATIONS, drop_islands=True):
y = np.asarray(y).flatten()
w.transformation = "b" # ensure we have binary weights
self.w = w
self.adj_list = self.w.to_adjlist(
remove_symmetric=False, drop_islands=drop_islands
)

if isinstance(w, W):
w.transform = "b" # ensure we have binary weights
self.w = w
self.adj_list = self.w.to_adjlist(
remove_symmetric=False, drop_islands=drop_islands
)
else:
w = w.transform("b")
self.w = w
if drop_islands:
self.adj_list = w.adjacency.drop(w.isolates).reset_index()
else:
self.adj_list = w.adjacency.reset_index()
self.y = y
self.permutations = permutations
self.J = w.s0 / 2.0
s0 = w.s0 if isinstance(w, W) else w._adjacency.sum()
self.J = s0 / 2.0
results = self.__calc(self.y)
self.bb = results[0]
self.ww = results[1]
Expand Down Expand Up @@ -206,9 +218,7 @@ def __init__(self, y, w, permutations=PERMUTATIONS, drop_islands=True):
self.sim_autocurr_pos
) ** 2 + (
self.autocorr_neg - np.mean(self.sim_autocurr_neg)
) ** 2 / np.mean(
self.sim_autocurr_pos
) ** 2
) ** 2 / np.mean(self.sim_autocurr_pos) ** 2
self.sim_autocorr_chi2 = 1 - chi2.cdf(stat, 1)

p_sim_bb = self.__pseudop(self.sim_bb, self.bb)
Expand All @@ -228,7 +238,10 @@ def __init__(self, y, w, permutations=PERMUTATIONS, drop_islands=True):

def __calc(self, z):
adj_list = self.adj_list
zseries = pd.Series(z, index=self.w.id_order)
zseries = pd.Series(
z,
index=self.w.id_order if hasattr(self.w, "id_order") else self.w.unique_ids,
)
focal = zseries.loc[adj_list.focal].values
neighbor = zseries.loc[adj_list.neighbor].values
sim = focal == neighbor
Expand Down Expand Up @@ -265,9 +278,9 @@ def by_col(
a pandas dataframe with a geometry column
cols : string or list of string
name or list of names of columns to use to compute the statistic
w : pysal weights object
a weights object aligned with the dataframe. If not provided, this
is searched for in the dataframe's metadata
w : W | Graph
spatial weights instance as W or Graph aligned with the dataframe. If not
provided, this is searched for in the dataframe's metadata
inplace : bool
a boolean denoting whether to operate on the dataframe inplace or to
return a series contaning the results of the computation. If
Expand Down Expand Up @@ -310,7 +323,7 @@ def by_col(
outvals=outvals,
stat=cls,
swapname="bw",
**stat_kws
**stat_kws,
)


Expand Down
25 changes: 16 additions & 9 deletions esda/join_counts_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@


class Join_Counts_Local(BaseEstimator):

"""Univariate Local Join Count Statistic"""

def __init__(
Expand All @@ -29,10 +28,8 @@ def __init__(
Parameters
----------
connectivity : scipy.sparse matrix object
the connectivity structure describing
the relationships between observed units.
Need not be row-standardized.
connectivity : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo
p_values
Expand Down Expand Up @@ -119,8 +116,11 @@ def fit(self, y, n_jobs=1, permutations=999):

w = self.connectivity
# Fill the diagonal with 0s
w = weights.util.fill_diagonal(w, val=0)
w.transform = "b"
if isinstance(w, weights.W):
w = weights.util.fill_diagonal(w, val=0)
w.transform = "b"
else:
w = w.assign_self_weight(0).eliminate_zeros().transform("b")

keep_simulations = self.keep_simulations
n_jobs = self.n_jobs
Expand Down Expand Up @@ -152,8 +152,15 @@ def fit(self, y, n_jobs=1, permutations=999):
def _statistic(y, w, drop_islands):
# Create adjacency list. Note that remove_symmetric=False - this is
# different from the esda.Join_Counts() function.
adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
zseries = pd.Series(y, index=w.id_order)
if isinstance(w, weights.W):
adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
zseries = pd.Series(y, index=w.id_order)
else:
if drop_islands:
adj_list = w.adjacency.drop(w.isolates).reset_index()
else:
adj_list = w.adjacency.reset_index()
zseries = pd.Series(y, index=w.unique_ids)
focal = zseries.loc[adj_list.focal].values
neighbor = zseries.loc[adj_list.neighbor].values
LJC = (focal == 1) & (neighbor == 1)
Expand Down
32 changes: 20 additions & 12 deletions esda/join_counts_local_bv.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@


class Join_Counts_Local_BV(BaseEstimator):

"""Univariate Local Join Count Statistic"""

def __init__(
Expand All @@ -29,10 +28,8 @@ def __init__(
Parameters
----------
connectivity : scipy.sparse matrix object
the connectivity structure describing
the relationships between observed units.
Need not be row-standardized.
connectivity : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo p_values
n_jobs : int
Expand Down Expand Up @@ -132,8 +129,11 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):

w = self.connectivity
# Fill the diagonal with 0s
w = weights.util.fill_diagonal(w, val=0)
w.transform = "b"
if isinstance(w, weights.W):
w = weights.util.fill_diagonal(w, val=0)
w.transform = "b"
else:
w = w.assign_self_weight(0).eliminate_zeros().transform("b")

self.x = x
self.z = z
Expand Down Expand Up @@ -183,11 +183,19 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):
def _statistic(x, z, w, case, drop_islands):
# Create adjacency list. Note that remove_symmetric=False - this is
# different from the esda.Join_Counts() function.
adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)

# First, set up a series that maps the values to the weights table
zseries_x = pd.Series(x, index=w.id_order)
zseries_z = pd.Series(z, index=w.id_order)
if isinstance(w, weights.W):
adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
# First, set up a series that maps the values to the weights table
zseries_x = pd.Series(x, index=w.id_order)
zseries_z = pd.Series(z, index=w.id_order)
else:
if drop_islands:
adj_list = w.adjacency.drop(w.isolates).reset_index()
else:
adj_list = w.adjacency.reset_index()
# First, set up a series that maps the values to the weights table
zseries_x = pd.Series(x, index=w.unique_ids)
zseries_z = pd.Series(z, index=w.unique_ids)

# Map the values to the focal (i) values
focal_x = zseries_x.loc[adj_list.focal].values
Expand Down
28 changes: 18 additions & 10 deletions esda/join_counts_local_mv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

class Join_Counts_Local_MV(BaseEstimator):
"""Multivariate Local Join Count Statistic"""

def __init__(
self,
connectivity=None,
Expand All @@ -27,10 +28,8 @@ def __init__(
Parameters
----------
connectivity : scipy.sparse matrix object
the connectivity structure describing
the relationships between observed units.
Need not be row-standardized.
connectivity : W | Graph
spatial weights instance as W or Graph aligned with y
permutations : int
number of random permutations for calculation of pseudo p_values
n_jobs : int
Expand Down Expand Up @@ -110,8 +109,11 @@ def fit(self, variables, n_jobs=1, permutations=999):

w = self.connectivity
# Fill the diagonal with 0s
w = weights.util.fill_diagonal(w, val=0)
w.transform = "b"
if isinstance(w, weights.W):
w = weights.util.fill_diagonal(w, val=0)
w.transform = "b"
else:
w = w.assign_self_weight(0).eliminate_zeros().transform("b")

self.n = len(variables[0])
self.w = w
Expand Down Expand Up @@ -144,10 +146,16 @@ def fit(self, variables, n_jobs=1, permutations=999):
def _statistic(variables, w, drop_islands):
# Create adjacency list. Note that remove_symmetric=False -
# different from the esda.Join_Counts() function.
adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)

# The zseries
zseries = [pd.Series(i, index=w.id_order) for i in variables]
if isinstance(w, weights.W):
adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
# The zseries
zseries = [pd.Series(i, index=w.id_order) for i in variables]
else:
if drop_islands:
adj_list = w.adjacency.drop(w.isolates).reset_index()
else:
adj_list = w.adjacency.reset_index()
zseries = [pd.Series(i, index=w.unique_ids) for i in variables]
# The focal values
focal = [zseries[i].loc[adj_list.focal].values for i in range(len(variables))]
# The neighbor values
Expand Down
Loading

0 comments on commit b183177

Please sign in to comment.