Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

finalise contiguity graph builders #566

Merged
merged 1 commit into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 36 additions & 24 deletions libpysal/graph/_contiguity.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,29 +44,19 @@ def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False):
graph[idx] = set([idx])

# get all of the vertices for the input
assert (
~geoms.geom_type.str.endswith("Point")
).any(), "this graph type is only well-defined for line and polygon geometries."

## TODO: this induces a "fake" edge between the closing and opening point
## of two multipolygon parts. This should never enter into calculations,
## *unless* two multipolygons share opening and closing points in the
## same order and part order. Still, this should be fixed by ensuring that
## only adjacent points of the same part of the same polygon are used.
## this bug also exists in the existing contiguity builder.

# geoms = geoms.explode()
# multipolygon_ixs = geoms.get_level_values(0)
# ids = ids[multipolygon_ixs]
# geoms = geoms.geometry
vertices, offsets = shapely.get_coordinates(geoms.geometry, return_index=True)
# use offsets from exploded geoms to create edges to avoid a phantom edge between
# parts of multipolygon
_, single_part_offsets = shapely.get_coordinates(
geoms.geometry.explode(ignore_index=True), return_index=True
)
# initialise the hashmap we want to invert
vert_to_geom = defaultdict(set)

# populate the hashmap we intend to invert
if rook:
for i, vertex in enumerate(vertices[:-1]):
if offsets[i] != offsets[i + 1]:
if single_part_offsets[i] != single_part_offsets[i + 1]:
continue
edge = tuple(sorted([tuple(vertex), tuple(vertices[i + 1])]))
# edge to {polygons, with, this, edge}
Expand Down Expand Up @@ -143,6 +133,35 @@ def _queen(geoms, ids=None, by_perimeter=False):


def _rook(geoms, ids=None, by_perimeter=False):
"""
Construct rook contiguity using point-set relations.

Rook contiguity occurs when two polygons touch over at least one edge.
Overlapping polygons will not be considered as neighboring
under this rule, since contiguity is strictly planar.

Parameters
----------
geoms : geopandas.GeoDataFrame, geopandas.GeoSeries, numpy.array
The container for the geometries to compute contiguity. Regardless of
the containing type, the geometries within the container must be Polygons
or MultiPolygons.
ids : numpy.ndarray (default: None)
names to use for indexing the graph constructed from geoms. If None (default),
an index is extracted from `geoms`. If `geoms` has no index, a pandas.RangeIndex
is constructed.
by_perimeter : bool (default: False)
whether to compute perimeter-weighted contiguity. By default, this returns
the raw length of perimeter overlap betwen contiguous polygons or lines.
In the case of LineString/MultiLineString input geoms, this is likely to
result in empty weights, where all observations are isolates.

Returns
-------
(heads, tails, weights) : three vectors describing the links in the
rook contiguity graph, with islands represented as a self-loop with
zero weight.
"""
_, ids, geoms = _validate_geometry_input(
geoms, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
Expand All @@ -160,13 +179,6 @@ def _rook(geoms, ids=None, by_perimeter=False):
return _resolve_islands(heads, tails, ids, weights)


_rook.__doc__ = (
_queen.__doc__.replace("queen", "rook")
.replace("Queen", "Rook")
.replace("exactly at a point", "over at least one edge")
)


def _perimeter_weights(geoms, heads, tails):
"""
Compute the perimeter of neighbor pairs for edges describing a contiguity graph.
Expand All @@ -180,7 +192,7 @@ def _perimeter_weights(geoms, heads, tails):
on input data are expected.
"""
intersection = shapely.intersection(geoms[heads].values, geoms[tails].values)
geom_types = shapely.get_type_id(intersection)
geom_types = shapely.get_type_id(shapely.get_parts(intersection))

# check if the intersection resulted in (Multi)Polygon
if numpy.isin(geom_types, [3, 6]).any():
Expand Down
74 changes: 43 additions & 31 deletions libpysal/graph/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
import shapely
from itertools import permutations


def _sparse_to_arrays(sparray, ids=None):
if ids is None:
maxdim = np.maximum(*sparray.shape)
ids = np.arange(maxdim)
head_ix, tail_ix = sparray.nonzero()
return ids[head_ix], ids[tail_ix], sparray.data


def _jitter_geoms(coordinates, geoms, seed=None):
"""
Jitter geometries based on the smallest required movements to induce
Expand All @@ -30,16 +32,17 @@ def _jitter_geoms(coordinates, geoms, seed=None):
# the resolution is the approximate difference between two floats
# that can be resolved at the given dtype.
resolution = np.finfo(dtype).resolution
r = rng.random(size=coordinates.shape[0], dtype=dtype)**.5 * resolution
r = rng.random(size=coordinates.shape[0], dtype=dtype) ** 0.5 * resolution
theta = rng.random(size=coordinates.shape[0], dtype=dtype) * np.pi * 2
# converting from polar to cartesian
dx = r + np.sin(theta)
dy = r + np.cos(theta)
# then adding the displacements
coordinates = coordinates + np.column_stack((dx,dy))
coordinates = coordinates + np.column_stack((dx, dy))
geoms = geopandas.GeoSeries(geopandas.points_from_xy(*coordinates.T, crs=geoms.crs))
return coordinates, geoms


def _induce_cliques(adjtable, clique_to_members, fill_value=1):
"""
induce cliques into the input graph. This connects everything within a
Expand All @@ -48,31 +51,31 @@ def _induce_cliques(adjtable, clique_to_members, fill_value=1):

This does not guarantee/understand ordering of the *output* adjacency table.
"""
adj_across_clique = adjtable.merge(
clique_to_members['input_index'], left_index=True, right_index=True
).explode('input_index').rename(
columns=dict(input_index='subclique_focal')
).merge(
clique_to_members['input_index'], left_on='neighbor', right_index=True
).explode('input_index').rename(
columns=dict(input_index='subclique_neighbor')
).reset_index().drop(
['focal','neighbor', 'index'], axis=1
).rename(
columns=dict(subclique_focal="focal", subclique_neighbor='neighbor')
adj_across_clique = (
adjtable.merge(
clique_to_members["input_index"], left_index=True, right_index=True
)
.explode("input_index")
.rename(columns=dict(input_index="subclique_focal"))
.merge(clique_to_members["input_index"], left_on="neighbor", right_index=True)
.explode("input_index")
.rename(columns=dict(input_index="subclique_neighbor"))
.reset_index()
.drop(["focal", "neighbor", "index"], axis=1)
.rename(columns=dict(subclique_focal="focal", subclique_neighbor="neighbor"))
)
is_multimember_clique = clique_to_members["input_index"].str.len() > 1
adj_within_clique = (
clique_to_members[is_multimember_clique]["input_index"]
.apply(lambda x: list(permutations(x, 2)))
.explode()
.apply(pd.Series)
.rename(columns={0: "focal", 1: "neighbor"})
.assign(weight=fill_value)
)
is_multimember_clique = clique_to_members['input_index'].str.len()>1
adj_within_clique = clique_to_members[
is_multimember_clique
]['input_index'].apply(
lambda x: list(permutations(x, 2))
).explode().apply(
pd.Series
).rename(columns={0:"focal", 1:"neighbor"}).assign(weight=fill_value)

new_adj = pd.concat(
(adj_across_clique, adj_within_clique),
ignore_index=True, axis=0
(adj_across_clique, adj_within_clique), ignore_index=True, axis=0
).reset_index(drop=True)

return new_adj
Expand Down Expand Up @@ -104,12 +107,20 @@ def _build_coincidence_lookup(geoms):
valid_coincident_geom_types = set(("Point",))
if not set(geoms.geom_type) <= valid_coincident_geom_types:
raise ValueError(
f"coindicence checks are only well-defined for geom_types: {valid_coincident_geom_types}"
)
"coindicence checks are only well-defined for "
f"geom_types: {valid_coincident_geom_types}"
)
max_coincident = geoms.geometry.duplicated().sum()
lut = geoms.to_frame("geometry").reset_index().groupby("geometry")['index'].agg(list).reset_index()
lut = (
geoms.to_frame("geometry")
.reset_index()
.groupby("geometry")["index"]
.agg(list)
.reset_index()
)
lut = geopandas.GeoDataFrame(lut)
return max_coincident, lut.rename(columns=dict(index='input_index'))
return max_coincident, lut.rename(columns=dict(index="input_index"))


def _validate_geometry_input(geoms, ids=None, valid_geometry_types=None):
"""
Expand All @@ -134,7 +145,7 @@ def _validate_geometry_input(geoms, ids=None, valid_geometry_types=None):
valid_geometry_types = set(valid_geometry_types)
if not geom_types <= valid_geometry_types:
raise ValueError(
"this Graph type is only well-defined for "
"This Graph type is only well-defined for "
f"geom_types: {valid_geometry_types}."
)
coordinates = shapely.get_coordinates(geoms)
Expand Down Expand Up @@ -167,11 +178,12 @@ def _validate_sparse_input(sparse, ids=None):
), "coordinates should represent a distance matrix if metric='precomputed'"
return _sparse_to_arrays(sparse, ids)

def _vec_euclidean_distances(X,Y):

def _vec_euclidean_distances(X, Y):
"""
compute the euclidean distances along corresponding rows of two arrays
"""
return ((X-Y)**2).sum(axis=1)**.5
return ((X - Y) ** 2).sum(axis=1) ** 0.5


def _evaluate_index(data):
Expand Down
8 changes: 6 additions & 2 deletions libpysal/graph/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,12 @@ def from_dicts(cls, neighbors, weights=None):
def build_contiguity(cls, geometry, rook=True, by_perimeter=False, strict=False):
"""Generate Graph from geometry based on contiguity

TODO: specify the planarity constraint of the defitnion of queen and rook (e.g
that there could not be an overlap).
Contiguity builder assumes that all geometries are forming a coverage, i.e.
a non-overlapping mesh and neighbouring geometries share only points or segments
of their exterior boundaries. In practice, ``build_contiguity`` is capable of
creating a Graph of partially overlapping geometries when
``strict=False, by_perimeter=False``, but that would not strictly follow the
definition of queen or rook contiguity.

Parameters
----------
Expand Down
28 changes: 22 additions & 6 deletions libpysal/graph/tests/test_contiguity.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,26 @@ def test_correctness_rook_queen_distinct(pointset):
rook_ = _vertex_set_intersection(data.geometry, rook=True)
queen_ = _vertex_set_intersection(data.geometry, rook=False)

with pytest.raises(AssertionError):
assert set(zip(*rook_)) == set(zip(*queen_))
assert set(zip(*rook_)) != set(zip(*queen_))


def test_geom_type_raise():
"""
Check the error for point geoms
"""
data = geopandas.GeoSeries((shapely.Point(0, 0), shapely.Point(2, 2)))
with pytest.raises(ValueError, match="This Graph type is only well-defined"):
_vertex_set_intersection(data)


def test_overlap_raise():
data = nybb.set_index("BoroName").geometry.copy()
data.iloc[1] = shapely.union(
data.iloc[1], shapely.Point(1021176.479, 181374.797).buffer(10000)
)

with pytest.raises(ValueError, match="Some geometries overlap."):
_vertex_set_intersection(data, by_perimeter=True)


def test_correctness_vertex_set_contiguity_distinct():
Expand All @@ -199,15 +217,13 @@ def test_correctness_vertex_set_contiguity_distinct():

rook = _rook(data)

with pytest.raises(AssertionError):
assert set(zip(*vs_rook)) == set(zip(*rook))
assert set(zip(*vs_rook)) != set(zip(*rook))

vs_queen = _vertex_set_intersection(data, rook=False)

queen = _queen(data)

with pytest.raises(AssertionError):
assert set(zip(*vs_queen)) == set(zip(*queen))
assert set(zip(*vs_queen)) != set(zip(*queen))


@pytest.mark.parametrize(
Expand Down