ENH: natively support Graph in all Join Counts statistics (#295)

* join_counts * local join counts * remaining local join counts * use private s0 * handle drop_islands * compute s0 directly
pysal · Jul 20, 2024 · b183177 · b183177
1 parent 6d47388
commit b183177
Show file tree

Hide file tree

Showing 8 changed files with 191 additions and 168 deletions.
diff --git a/esda/join_counts.py b/esda/join_counts.py
@@ -2,12 +2,14 @@
 Spatial autocorrelation for binary attributes
 
 """
+
 __author__ = "Sergio J. Rey <srey@asu.edu> , Luc Anselin <luc.anselin@asu.edu>"
 
 import warnings
 
 import numpy as np
 import pandas as pd
+from libpysal.weights import W
 from scipy.stats import chi2, chi2_contingency
 
 from .crand import njit as _njit
@@ -18,7 +20,7 @@
 PERMUTATIONS = 999
 
 
-class Join_Counts(object):
+class Join_Counts:
  """Binary Join Counts
 
 
@@ -27,8 +29,8 @@ class Join_Counts(object):
 
  y : array
  binary variable measured across n spatial units
- w : W
- spatial weights instance
+ w : W | Graph
+ spatial weights instance as W or Graph aligned with y
  permutations : int
  number of random permutations for calculation of pseudo-p_values
 
@@ -148,14 +150,24 @@ class Join_Counts(object):
 
  def __init__(self, y, w, permutations=PERMUTATIONS, drop_islands=True):
  y = np.asarray(y).flatten()
- w.transformation = "b" # ensure we have binary weights
- self.w = w
- self.adj_list = self.w.to_adjlist(
- remove_symmetric=False, drop_islands=drop_islands
- )
+
+ if isinstance(w, W):
+ w.transform = "b" # ensure we have binary weights
+ self.w = w
+ self.adj_list = self.w.to_adjlist(
+ remove_symmetric=False, drop_islands=drop_islands
+ )
+ else:
+ w = w.transform("b")
+ self.w = w
+ if drop_islands:
+ self.adj_list = w.adjacency.drop(w.isolates).reset_index()
+ else:
+ self.adj_list = w.adjacency.reset_index()
  self.y = y
  self.permutations = permutations
- self.J = w.s0 / 2.0
+ s0 = w.s0 if isinstance(w, W) else w._adjacency.sum()
+ self.J = s0 / 2.0
  results = self.__calc(self.y)
  self.bb = results[0]
  self.ww = results[1]
@@ -206,9 +218,7 @@ def __init__(self, y, w, permutations=PERMUTATIONS, drop_islands=True):
  self.sim_autocurr_pos
  ) ** 2 + (
  self.autocorr_neg - np.mean(self.sim_autocurr_neg)
- ) ** 2 / np.mean(
- self.sim_autocurr_pos
- ) ** 2
+ ) ** 2 / np.mean(self.sim_autocurr_pos) ** 2
  self.sim_autocorr_chi2 = 1 - chi2.cdf(stat, 1)
 
  p_sim_bb = self.__pseudop(self.sim_bb, self.bb)
@@ -228,7 +238,10 @@ def __init__(self, y, w, permutations=PERMUTATIONS, drop_islands=True):
 
  def __calc(self, z):
  adj_list = self.adj_list
- zseries = pd.Series(z, index=self.w.id_order)
+ zseries = pd.Series(
+ z,
+ index=self.w.id_order if hasattr(self.w, "id_order") else self.w.unique_ids,
+ )
  focal = zseries.loc[adj_list.focal].values
  neighbor = zseries.loc[adj_list.neighbor].values
  sim = focal == neighbor
@@ -265,9 +278,9 @@ def by_col(
  a pandas dataframe with a geometry column
  cols : string or list of string
  name or list of names of columns to use to compute the statistic
- w : pysal weights object
- a weights object aligned with the dataframe. If not provided, this
- is searched for in the dataframe's metadata
+ w : W | Graph
+ spatial weights instance as W or Graph aligned with the dataframe. If not
+ provided, this is searched for in the dataframe's metadata
  inplace : bool
  a boolean denoting whether to operate on the dataframe inplace or to
  return a series contaning the results of the computation. If
@@ -310,7 +323,7 @@ def by_col(
  outvals=outvals,
  stat=cls,
  swapname="bw",
- **stat_kws
+ **stat_kws,
  )
 
 

diff --git a/esda/join_counts_local.py b/esda/join_counts_local.py
@@ -11,7 +11,6 @@
 
 
 class Join_Counts_Local(BaseEstimator):
-
  """Univariate Local Join Count Statistic"""
 
  def __init__(
@@ -29,10 +28,8 @@ def __init__(
 
  Parameters
  ----------
- connectivity : scipy.sparse matrix object
- the connectivity structure describing
- the relationships between observed units.
- Need not be row-standardized.
+ connectivity : W | Graph
+ spatial weights instance as W or Graph aligned with y
  permutations : int
  number of random permutations for calculation of pseudo
  p_values
@@ -119,8 +116,11 @@ def fit(self, y, n_jobs=1, permutations=999):
 
  w = self.connectivity
  # Fill the diagonal with 0s
- w = weights.util.fill_diagonal(w, val=0)
- w.transform = "b"
+ if isinstance(w, weights.W):
+ w = weights.util.fill_diagonal(w, val=0)
+ w.transform = "b"
+ else:
+ w = w.assign_self_weight(0).eliminate_zeros().transform("b")
 
  keep_simulations = self.keep_simulations
  n_jobs = self.n_jobs
@@ -152,8 +152,15 @@ def fit(self, y, n_jobs=1, permutations=999):
  def _statistic(y, w, drop_islands):
  # Create adjacency list. Note that remove_symmetric=False - this is
  # different from the esda.Join_Counts() function.
- adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
- zseries = pd.Series(y, index=w.id_order)
+ if isinstance(w, weights.W):
+ adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
+ zseries = pd.Series(y, index=w.id_order)
+ else:
+ if drop_islands:
+ adj_list = w.adjacency.drop(w.isolates).reset_index()
+ else:
+ adj_list = w.adjacency.reset_index()
+ zseries = pd.Series(y, index=w.unique_ids)
  focal = zseries.loc[adj_list.focal].values
  neighbor = zseries.loc[adj_list.neighbor].values
  LJC = (focal == 1) & (neighbor == 1)

diff --git a/esda/join_counts_local_bv.py b/esda/join_counts_local_bv.py
@@ -11,7 +11,6 @@
 
 
 class Join_Counts_Local_BV(BaseEstimator):
-
  """Univariate Local Join Count Statistic"""
 
  def __init__(
@@ -29,10 +28,8 @@ def __init__(
 
  Parameters
  ----------
- connectivity : scipy.sparse matrix object
- the connectivity structure describing
- the relationships between observed units.
- Need not be row-standardized.
+ connectivity : W | Graph
+ spatial weights instance as W or Graph aligned with y
  permutations : int
  number of random permutations for calculation of pseudo p_values
  n_jobs : int
@@ -132,8 +129,11 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):
 
  w = self.connectivity
  # Fill the diagonal with 0s
- w = weights.util.fill_diagonal(w, val=0)
- w.transform = "b"
+ if isinstance(w, weights.W):
+ w = weights.util.fill_diagonal(w, val=0)
+ w.transform = "b"
+ else:
+ w = w.assign_self_weight(0).eliminate_zeros().transform("b")
 
  self.x = x
  self.z = z
@@ -183,11 +183,19 @@ def fit(self, x, z, case="CLC", n_jobs=1, permutations=999):
  def _statistic(x, z, w, case, drop_islands):
  # Create adjacency list. Note that remove_symmetric=False - this is
  # different from the esda.Join_Counts() function.
- adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
-
- # First, set up a series that maps the values to the weights table
- zseries_x = pd.Series(x, index=w.id_order)
- zseries_z = pd.Series(z, index=w.id_order)
+ if isinstance(w, weights.W):
+ adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
+ # First, set up a series that maps the values to the weights table
+ zseries_x = pd.Series(x, index=w.id_order)
+ zseries_z = pd.Series(z, index=w.id_order)
+ else:
+ if drop_islands:
+ adj_list = w.adjacency.drop(w.isolates).reset_index()
+ else:
+ adj_list = w.adjacency.reset_index()
+ # First, set up a series that maps the values to the weights table
+ zseries_x = pd.Series(x, index=w.unique_ids)
+ zseries_z = pd.Series(z, index=w.unique_ids)
 
  # Map the values to the focal (i) values
  focal_x = zseries_x.loc[adj_list.focal].values

diff --git a/esda/join_counts_local_mv.py b/esda/join_counts_local_mv.py
@@ -12,6 +12,7 @@
 
 class Join_Counts_Local_MV(BaseEstimator):
  """Multivariate Local Join Count Statistic"""
+
  def __init__(
  self,
  connectivity=None,
@@ -27,10 +28,8 @@ def __init__(
 
  Parameters
  ----------
- connectivity : scipy.sparse matrix object
- the connectivity structure describing
- the relationships between observed units.
- Need not be row-standardized.
+ connectivity : W | Graph
+ spatial weights instance as W or Graph aligned with y
  permutations : int
  number of random permutations for calculation of pseudo p_values
  n_jobs : int
@@ -110,8 +109,11 @@ def fit(self, variables, n_jobs=1, permutations=999):
 
  w = self.connectivity
  # Fill the diagonal with 0s
- w = weights.util.fill_diagonal(w, val=0)
- w.transform = "b"
+ if isinstance(w, weights.W):
+ w = weights.util.fill_diagonal(w, val=0)
+ w.transform = "b"
+ else:
+ w = w.assign_self_weight(0).eliminate_zeros().transform("b")
 
  self.n = len(variables[0])
  self.w = w
@@ -144,10 +146,16 @@ def fit(self, variables, n_jobs=1, permutations=999):
  def _statistic(variables, w, drop_islands):
  # Create adjacency list. Note that remove_symmetric=False -
  # different from the esda.Join_Counts() function.
- adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
-
- # The zseries
- zseries = [pd.Series(i, index=w.id_order) for i in variables]
+ if isinstance(w, weights.W):
+ adj_list = w.to_adjlist(remove_symmetric=False, drop_islands=drop_islands)
+ # The zseries
+ zseries = [pd.Series(i, index=w.id_order) for i in variables]
+ else:
+ if drop_islands:
+ adj_list = w.adjacency.drop(w.isolates).reset_index()
+ else:
+ adj_list = w.adjacency.reset_index()
+ zseries = [pd.Series(i, index=w.unique_ids) for i in variables]
  # The focal values
  focal = [zseries[i].loc[adj_list.focal].values for i in range(len(variables))]
  # The neighbor values