diff --git a/libpysal/graph/_contiguity.py b/libpysal/graph/_contiguity.py index 622942790..df2332bf2 100644 --- a/libpysal/graph/_contiguity.py +++ b/libpysal/graph/_contiguity.py @@ -44,29 +44,19 @@ def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False): graph[idx] = set([idx]) # get all of the vertices for the input - assert ( - ~geoms.geom_type.str.endswith("Point") - ).any(), "this graph type is only well-defined for line and polygon geometries." - - ## TODO: this induces a "fake" edge between the closing and opening point - ## of two multipolygon parts. This should never enter into calculations, - ## *unless* two multipolygons share opening and closing points in the - ## same order and part order. Still, this should be fixed by ensuring that - ## only adjacent points of the same part of the same polygon are used. - ## this bug also exists in the existing contiguity builder. - - # geoms = geoms.explode() - # multipolygon_ixs = geoms.get_level_values(0) - # ids = ids[multipolygon_ixs] - # geoms = geoms.geometry vertices, offsets = shapely.get_coordinates(geoms.geometry, return_index=True) + # use offsets from exploded geoms to create edges to avoid a phantom edge between + # parts of multipolygon + _, single_part_offsets = shapely.get_coordinates( + geoms.geometry.explode(ignore_index=True), return_index=True + ) # initialise the hashmap we want to invert vert_to_geom = defaultdict(set) # populate the hashmap we intend to invert if rook: for i, vertex in enumerate(vertices[:-1]): - if offsets[i] != offsets[i + 1]: + if single_part_offsets[i] != single_part_offsets[i + 1]: continue edge = tuple(sorted([tuple(vertex), tuple(vertices[i + 1])])) # edge to {polygons, with, this, edge} @@ -143,6 +133,35 @@ def _queen(geoms, ids=None, by_perimeter=False): def _rook(geoms, ids=None, by_perimeter=False): + """ + Construct rook contiguity using point-set relations. + + Rook contiguity occurs when two polygons touch over at least one edge. + Overlapping polygons will not be considered as neighboring + under this rule, since contiguity is strictly planar. + + Parameters + ---------- + geoms : geopandas.GeoDataFrame, geopandas.GeoSeries, numpy.array + The container for the geometries to compute contiguity. Regardless of + the containing type, the geometries within the container must be Polygons + or MultiPolygons. + ids : numpy.ndarray (default: None) + names to use for indexing the graph constructed from geoms. If None (default), + an index is extracted from `geoms`. If `geoms` has no index, a pandas.RangeIndex + is constructed. + by_perimeter : bool (default: False) + whether to compute perimeter-weighted contiguity. By default, this returns + the raw length of perimeter overlap betwen contiguous polygons or lines. + In the case of LineString/MultiLineString input geoms, this is likely to + result in empty weights, where all observations are isolates. + + Returns + ------- + (heads, tails, weights) : three vectors describing the links in the + rook contiguity graph, with islands represented as a self-loop with + zero weight. + """ _, ids, geoms = _validate_geometry_input( geoms, ids=ids, valid_geometry_types=_VALID_GEOMETRY_TYPES ) @@ -160,13 +179,6 @@ def _rook(geoms, ids=None, by_perimeter=False): return _resolve_islands(heads, tails, ids, weights) -_rook.__doc__ = ( - _queen.__doc__.replace("queen", "rook") - .replace("Queen", "Rook") - .replace("exactly at a point", "over at least one edge") -) - - def _perimeter_weights(geoms, heads, tails): """ Compute the perimeter of neighbor pairs for edges describing a contiguity graph. @@ -180,7 +192,7 @@ def _perimeter_weights(geoms, heads, tails): on input data are expected. """ intersection = shapely.intersection(geoms[heads].values, geoms[tails].values) - geom_types = shapely.get_type_id(intersection) + geom_types = shapely.get_type_id(shapely.get_parts(intersection)) # check if the intersection resulted in (Multi)Polygon if numpy.isin(geom_types, [3, 6]).any(): diff --git a/libpysal/graph/_utils.py b/libpysal/graph/_utils.py index befcdc972..e4984e3c6 100644 --- a/libpysal/graph/_utils.py +++ b/libpysal/graph/_utils.py @@ -4,6 +4,7 @@ import shapely from itertools import permutations + def _sparse_to_arrays(sparray, ids=None): if ids is None: maxdim = np.maximum(*sparray.shape) @@ -11,6 +12,7 @@ def _sparse_to_arrays(sparray, ids=None): head_ix, tail_ix = sparray.nonzero() return ids[head_ix], ids[tail_ix], sparray.data + def _jitter_geoms(coordinates, geoms, seed=None): """ Jitter geometries based on the smallest required movements to induce @@ -30,16 +32,17 @@ def _jitter_geoms(coordinates, geoms, seed=None): # the resolution is the approximate difference between two floats # that can be resolved at the given dtype. resolution = np.finfo(dtype).resolution - r = rng.random(size=coordinates.shape[0], dtype=dtype)**.5 * resolution + r = rng.random(size=coordinates.shape[0], dtype=dtype) ** 0.5 * resolution theta = rng.random(size=coordinates.shape[0], dtype=dtype) * np.pi * 2 # converting from polar to cartesian dx = r + np.sin(theta) dy = r + np.cos(theta) # then adding the displacements - coordinates = coordinates + np.column_stack((dx,dy)) + coordinates = coordinates + np.column_stack((dx, dy)) geoms = geopandas.GeoSeries(geopandas.points_from_xy(*coordinates.T, crs=geoms.crs)) return coordinates, geoms + def _induce_cliques(adjtable, clique_to_members, fill_value=1): """ induce cliques into the input graph. This connects everything within a @@ -48,31 +51,31 @@ def _induce_cliques(adjtable, clique_to_members, fill_value=1): This does not guarantee/understand ordering of the *output* adjacency table. """ - adj_across_clique = adjtable.merge( - clique_to_members['input_index'], left_index=True, right_index=True - ).explode('input_index').rename( - columns=dict(input_index='subclique_focal') - ).merge( - clique_to_members['input_index'], left_on='neighbor', right_index=True - ).explode('input_index').rename( - columns=dict(input_index='subclique_neighbor') - ).reset_index().drop( - ['focal','neighbor', 'index'], axis=1 - ).rename( - columns=dict(subclique_focal="focal", subclique_neighbor='neighbor') + adj_across_clique = ( + adjtable.merge( + clique_to_members["input_index"], left_index=True, right_index=True + ) + .explode("input_index") + .rename(columns=dict(input_index="subclique_focal")) + .merge(clique_to_members["input_index"], left_on="neighbor", right_index=True) + .explode("input_index") + .rename(columns=dict(input_index="subclique_neighbor")) + .reset_index() + .drop(["focal", "neighbor", "index"], axis=1) + .rename(columns=dict(subclique_focal="focal", subclique_neighbor="neighbor")) + ) + is_multimember_clique = clique_to_members["input_index"].str.len() > 1 + adj_within_clique = ( + clique_to_members[is_multimember_clique]["input_index"] + .apply(lambda x: list(permutations(x, 2))) + .explode() + .apply(pd.Series) + .rename(columns={0: "focal", 1: "neighbor"}) + .assign(weight=fill_value) ) - is_multimember_clique = clique_to_members['input_index'].str.len()>1 - adj_within_clique = clique_to_members[ - is_multimember_clique - ]['input_index'].apply( - lambda x: list(permutations(x, 2)) - ).explode().apply( - pd.Series - ).rename(columns={0:"focal", 1:"neighbor"}).assign(weight=fill_value) new_adj = pd.concat( - (adj_across_clique, adj_within_clique), - ignore_index=True, axis=0 + (adj_across_clique, adj_within_clique), ignore_index=True, axis=0 ).reset_index(drop=True) return new_adj @@ -104,12 +107,20 @@ def _build_coincidence_lookup(geoms): valid_coincident_geom_types = set(("Point",)) if not set(geoms.geom_type) <= valid_coincident_geom_types: raise ValueError( - f"coindicence checks are only well-defined for geom_types: {valid_coincident_geom_types}" - ) + "coindicence checks are only well-defined for " + f"geom_types: {valid_coincident_geom_types}" + ) max_coincident = geoms.geometry.duplicated().sum() - lut = geoms.to_frame("geometry").reset_index().groupby("geometry")['index'].agg(list).reset_index() + lut = ( + geoms.to_frame("geometry") + .reset_index() + .groupby("geometry")["index"] + .agg(list) + .reset_index() + ) lut = geopandas.GeoDataFrame(lut) - return max_coincident, lut.rename(columns=dict(index='input_index')) + return max_coincident, lut.rename(columns=dict(index="input_index")) + def _validate_geometry_input(geoms, ids=None, valid_geometry_types=None): """ @@ -134,7 +145,7 @@ def _validate_geometry_input(geoms, ids=None, valid_geometry_types=None): valid_geometry_types = set(valid_geometry_types) if not geom_types <= valid_geometry_types: raise ValueError( - "this Graph type is only well-defined for " + "This Graph type is only well-defined for " f"geom_types: {valid_geometry_types}." ) coordinates = shapely.get_coordinates(geoms) @@ -167,11 +178,12 @@ def _validate_sparse_input(sparse, ids=None): ), "coordinates should represent a distance matrix if metric='precomputed'" return _sparse_to_arrays(sparse, ids) -def _vec_euclidean_distances(X,Y): + +def _vec_euclidean_distances(X, Y): """ compute the euclidean distances along corresponding rows of two arrays """ - return ((X-Y)**2).sum(axis=1)**.5 + return ((X - Y) ** 2).sum(axis=1) ** 0.5 def _evaluate_index(data): diff --git a/libpysal/graph/base.py b/libpysal/graph/base.py index 6e37a31b5..7924f515f 100644 --- a/libpysal/graph/base.py +++ b/libpysal/graph/base.py @@ -342,8 +342,12 @@ def from_dicts(cls, neighbors, weights=None): def build_contiguity(cls, geometry, rook=True, by_perimeter=False, strict=False): """Generate Graph from geometry based on contiguity - TODO: specify the planarity constraint of the defitnion of queen and rook (e.g - that there could not be an overlap). + Contiguity builder assumes that all geometries are forming a coverage, i.e. + a non-overlapping mesh and neighbouring geometries share only points or segments + of their exterior boundaries. In practice, ``build_contiguity`` is capable of + creating a Graph of partially overlapping geometries when + ``strict=False, by_perimeter=False``, but that would not strictly follow the + definition of queen or rook contiguity. Parameters ---------- diff --git a/libpysal/graph/tests/test_contiguity.py b/libpysal/graph/tests/test_contiguity.py index cb62e6384..3d00f22b6 100644 --- a/libpysal/graph/tests/test_contiguity.py +++ b/libpysal/graph/tests/test_contiguity.py @@ -183,8 +183,26 @@ def test_correctness_rook_queen_distinct(pointset): rook_ = _vertex_set_intersection(data.geometry, rook=True) queen_ = _vertex_set_intersection(data.geometry, rook=False) - with pytest.raises(AssertionError): - assert set(zip(*rook_)) == set(zip(*queen_)) + assert set(zip(*rook_)) != set(zip(*queen_)) + + +def test_geom_type_raise(): + """ + Check the error for point geoms + """ + data = geopandas.GeoSeries((shapely.Point(0, 0), shapely.Point(2, 2))) + with pytest.raises(ValueError, match="This Graph type is only well-defined"): + _vertex_set_intersection(data) + + +def test_overlap_raise(): + data = nybb.set_index("BoroName").geometry.copy() + data.iloc[1] = shapely.union( + data.iloc[1], shapely.Point(1021176.479, 181374.797).buffer(10000) + ) + + with pytest.raises(ValueError, match="Some geometries overlap."): + _vertex_set_intersection(data, by_perimeter=True) def test_correctness_vertex_set_contiguity_distinct(): @@ -199,15 +217,13 @@ def test_correctness_vertex_set_contiguity_distinct(): rook = _rook(data) - with pytest.raises(AssertionError): - assert set(zip(*vs_rook)) == set(zip(*rook)) + assert set(zip(*vs_rook)) != set(zip(*rook)) vs_queen = _vertex_set_intersection(data, rook=False) queen = _queen(data) - with pytest.raises(AssertionError): - assert set(zip(*vs_queen)) == set(zip(*queen)) + assert set(zip(*vs_queen)) != set(zip(*queen)) @pytest.mark.parametrize(