diff --git a/CHANGELOG.md b/CHANGELOG.md index 081f93d2cb..783f68f2dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,13 +17,19 @@ Removed: ### Added - `Impact.impact_at_reg` method for aggregating impacts per country or custom region [#642](https://github.com/CLIMADA-project/climada_python/pull/642) +- `Impact.match_centroids` convenience method for matching (hazard) centroids to impact objects [#602](https://github.com/CLIMADA-project/climada_python/pull/602) +- `climada.util.coordinates.match_centroids` method for matching (hazard) centroids to GeoDataFrames [#602](https://github.com/CLIMADA-project/climada_python/pull/602) -### Changed +### Changed - Improved error messages from `climada.CONFIG` in case of missing configuration values [#670](https://github.com/CLIMADA-project/climada_python/pull/670) +- Refactored `Exposure.assign_centroids` using a new util function `u_coord.match_centroids` [#602](https://github.com/CLIMADA-project/climada_python/pull/602) +- Renamed `climada.util.coordinate.assign_grid_points` to `match_grid_points` and `climada.util.coordinates.assign_coordinates` to `match_coordinates` +[#602](https://github.com/CLIMADA-project/climada_python/pull/602) ### Fixed -- `util.lines_polys_handler` solve polygon disaggregation issue in metre-based projection [#666](https://github.com/CLIMADA-project/climada_python/pull/666) + +- `util.lines_polys_handler` solve polygon disaggregation issue in metre-based projection [#666](https://github.com/CLIMADA-project/climada_python/pull/666) ### Deprecated diff --git a/climada/engine/impact.py b/climada/engine/impact.py old mode 100755 new mode 100644 index dbccb48c6f..46712850f0 --- a/climada/engine/impact.py +++ b/climada/engine/impact.py @@ -1672,7 +1672,7 @@ def _selected_events_idx(self, event_ids, event_names, dates, nb_events): return sel_ev def _selected_exposures_idx(self, coord_exp): - assigned_idx = u_coord.assign_coordinates(self.coord_exp, coord_exp, threshold=0) + assigned_idx = u_coord.match_coordinates(self.coord_exp, coord_exp, threshold=0) sel_exp = (assigned_idx >= 0).nonzero()[0] if sel_exp.size == 0: LOGGER.warning("No exposure coordinates match the selection.") @@ -1783,6 +1783,38 @@ def stack_attribute(attr_name: str) -> np.ndarray: **kwargs, ) + def match_centroids(self, hazard, distance='euclidean', + threshold=u_coord.NEAREST_NEIGHBOR_THRESHOLD): + """ + Finds the closest hazard centroid for each impact coordinate. + Creates a temporary GeoDataFrame and uses ``u_coord.match_centroids()``. + See there for details and parameters + + Parameters + ---------- + hazard : Hazard + Hazard to match (with raster or vector centroids). + distance : str, optional + Distance to use in case of vector centroids. + Possible values are "euclidean", "haversine" and "approx". + Default: "euclidean" + threshold : float + If the distance (in km) to the nearest neighbor exceeds `threshold`, + the index `-1` is assigned. + Set `threshold` to 0, to disable nearest neighbor matching. + Default: 100 (km) + + Returns + ------- + np.array + array of closest Hazard centroids, aligned with the Impact's `coord_exp` array + """ + + return u_coord.match_centroids( + self._build_exp().gdf, + hazard.centroids, + distance=distance, + threshold=threshold) @dataclass class ImpactFreqCurve(): diff --git a/climada/engine/test/test_impact.py b/climada/engine/test/test_impact.py index 0ac3a470f5..c9c141a5eb 100644 --- a/climada/engine/test/test_impact.py +++ b/climada/engine/test/test_impact.py @@ -888,6 +888,20 @@ def test__exp_build_event(self): self.assertEqual(exp.value_unit, imp.unit) self.assertEqual(exp.ref_year, 0) +class TestMatchCentroids(unittest.TestCase): + + def test_match_centroids(self): + "Test that hazard centroids get assigned correctly" + exp = ENT.exposures + exp.assign_centroids(HAZ) + fake_eai_exp = np.arange(len(exp.gdf)) + fake_at_event = np.arange(HAZ.size) + fake_aai_agg = np.sum(fake_eai_exp) + imp = Impact.from_eih(exp, ENT.impact_funcs, HAZ, + fake_at_event, fake_eai_exp, fake_aai_agg) + imp_centr = imp.match_centroids(HAZ) + np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC) + class TestImpactH5IO(unittest.TestCase): """Tests for reading and writing Impact from/to H5 files""" @@ -1106,4 +1120,5 @@ def write_tag(group, tag_kwds): TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpact)) TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpactH5IO)) TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpactConcat)) + TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestAssignCentroids)) unittest.TextTestRunner(verbosity=2).run(TESTS) diff --git a/climada/entity/exposures/base.py b/climada/entity/exposures/base.py index 49802d247d..ef922a6cc2 100644 --- a/climada/entity/exposures/base.py +++ b/climada/entity/exposures/base.py @@ -369,6 +369,11 @@ def assign_centroids(self, hazard, distance='euclidean', threshold=u_coord.NEAREST_NEIGHBOR_THRESHOLD, overwrite=True): """Assign for each exposure coordinate closest hazard coordinate. + The Exposures ``gdf`` will be altered by this method. It will have an additional + (or modified) column named ``centr_[hazard.HAZ_TYPE]`` after the call. + + Uses the utility function ``u_coord.match_centroids``. See there for details + and parameters. The value -1 is used for distances larger than ``threshold`` in point distances. In case of raster hazards the value -1 is used for centroids outside of the raster. @@ -392,9 +397,10 @@ def assign_centroids(self, hazard, distance='euclidean', See Also -------- - climada.util.coordinates.assign_coordinates + climada.util.coordinates.match_grid_points: method to associate centroids to + exposure points when centroids is a raster + climada.util.coordinates.match_coordinates: method to associate centroids to exposure points - Notes ----- The default order of use is: @@ -422,18 +428,16 @@ def assign_centroids(self, hazard, distance='euclidean', LOGGER.info('Matching %s exposures with %s centroids.', str(self.gdf.shape[0]), str(hazard.centroids.size)) + if not u_coord.equal_crs(self.crs, hazard.centroids.crs): raise ValueError('Set hazard and exposure to same CRS first!') - if hazard.centroids.meta: - assigned = u_coord.assign_grid_points( - self.gdf.longitude.values, self.gdf.latitude.values, - hazard.centroids.meta['width'], hazard.centroids.meta['height'], - hazard.centroids.meta['transform']) - else: - assigned = u_coord.assign_coordinates( - np.stack([self.gdf.latitude.values, self.gdf.longitude.values], axis=1), - hazard.centroids.coord, distance=distance, threshold=threshold) - self.gdf[centr_haz] = assigned + # Note: equal_crs is tested here, rather than within match_centroids(), + # because exp.gdf.crs may not be defined, but exp.crs must be defined. + + assigned_centr = u_coord.match_centroids(self.gdf, hazard.centroids, + distance=distance, threshold=threshold) + self.gdf[centr_haz] = assigned_centr + def set_geometry_points(self, scheduler=None): """Set geometry attribute of GeoDataFrame with Points from latitude and diff --git a/climada/hazard/base.py b/climada/hazard/base.py index 52990a2300..7d81a1c4f3 100644 --- a/climada/hazard/base.py +++ b/climada/hazard/base.py @@ -1452,7 +1452,7 @@ def select_tight(self, buffer=NEAREST_NEIGHBOR_THRESHOLD/ONE_LAT_KM, See also -------- self.select: Method to select centroids by lat/lon extent - util.coordinates.assign_coordinates: algorithm to match centroids. + util.coordinates.match_coordinates: algorithm to match centroids. """ @@ -2320,7 +2320,7 @@ def append(self, *others): # map individual centroids objects to union centroids = Centroids.union(*[haz.centroids for haz in haz_list]) hazcent_in_cent_idx_list = [ - u_coord.assign_coordinates(haz.centroids.coord, centroids.coord, threshold=0) + u_coord.match_coordinates(haz.centroids.coord, centroids.coord, threshold=0) for haz in haz_list_nonempty ] @@ -2408,7 +2408,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD): Centroids instance on which to map the hazard. threshold: int or float Threshold (in km) for mapping haz.centroids not in centroids. - Argument is passed to climada.util.coordinates.assign_coordinates. + Argument is passed to climada.util.coordinates.match_coordinates. Default: 100 (km) Returns @@ -2423,7 +2423,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD): See Also -------- - util.coordinates.assign_coordinates: algorithm to match centroids. + util.coordinates.match_coordinates: algorithm to match centroids. """ # define empty hazard @@ -2432,7 +2432,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD): # indices for mapping matrices onto common centroids if centroids.meta: - new_cent_idx = u_coord.assign_grid_points( + new_cent_idx = u_coord.match_grid_points( self.centroids.lon, self.centroids.lat, centroids.meta['width'], centroids.meta['height'], centroids.meta['transform']) @@ -2441,7 +2441,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD): "the raster defined by centroids.meta." " Please choose a larger raster.") else: - new_cent_idx = u_coord.assign_coordinates( + new_cent_idx = u_coord.match_coordinates( self.centroids.coord, centroids.coord, threshold=threshold ) if -1 in new_cent_idx: diff --git a/climada/util/coordinates.py b/climada/util/coordinates.py index 9983a6b2c2..1680fc4150 100644 --- a/climada/util/coordinates.py +++ b/climada/util/coordinates.py @@ -924,7 +924,12 @@ def get_region_gridpoints(countries=None, regions=None, resolution=150, lat, lon = [ar.ravel() for ar in [lat, lon]] return lat, lon -def assign_grid_points(x, y, grid_width, grid_height, grid_transform): +def assign_grid_points(*args, **kwargs): + """This function has been renamed, use ``match_grid_points`` instead.""" + LOGGER.warning("This function has been renamed, use match_grid_points instead.") + return match_grid_points(*args, **kwargs) + +def match_grid_points(x, y, grid_width, grid_height, grid_transform): """To each coordinate in `x` and `y`, assign the closest centroid in the given raster grid Make sure that your grid specification is relative to the same coordinate reference system as @@ -961,7 +966,12 @@ def assign_grid_points(x, y, grid_width, grid_height, grid_transform): assigned[(y_i < 0) | (y_i >= grid_height)] = -1 return assigned -def assign_coordinates(coords, coords_to_assign, distance="euclidean", +def assign_coordinates(*args, **kwargs): + """This function has been renamed, use ``match_coordinates`` instead.""" + LOGGER.warning("This function has been renamed, use match_coordinates instead.") + return match_coordinates(*args, **kwargs) + +def match_coordinates(coords, coords_to_assign, distance="euclidean", threshold=NEAREST_NEIGHBOR_THRESHOLD, **kwargs): """To each coordinate in `coords`, assign a matching coordinate in `coords_to_assign` @@ -1057,6 +1067,71 @@ def assign_coordinates(coords, coords_to_assign, distance="euclidean", coords_to_assign, coords[not_assigned_idx_mask], threshold, **kwargs) return assigned_idx +def match_centroids(coord_gdf, centroids, distance='euclidean', + threshold=NEAREST_NEIGHBOR_THRESHOLD): + """Assign to each gdf coordinate point its closest centroids's coordinate. + If disatances > threshold in points' distances, -1 is returned. + If centroids are in a raster and coordinate point is outside of it ``-1`` is assigned + + Parameters + ---------- + coord_gdf : gpd.GeoDataFrame + GeoDataframe with defined latitude/longitude column and crs + centroids : Centroids + (Hazard) centroids to match (as raster or vector centroids). + distance : str, optional + Distance to use in case of vector centroids. + Possible values are "euclidean", "haversine" and "approx". + Default: "euclidean" + threshold : float, optional + If the distance (in km) to the nearest neighbor exceeds `threshold`, + the index `-1` is assigned. + Set `threshold` to 0, to disable nearest neighbor matching. + Default: ``NEAREST_NEIGHBOR_THRESHOLD`` (100km) + + See Also + -------- + climada.util.coordinates.match_grid_points: method to associate centroids to + coordinate points when centroids is a raster + climada.util.coordinates.match_coordinates: method to associate centroids to + coordinate points + + Notes + ----- + The default order of use is: + + 1. if centroid raster is defined, assign the closest raster point + to each gdf coordinate point. + 2. if no raster, assign centroids to the nearest neighbor using + euclidian metric + + Both cases can introduce innacuracies for coordinates in lat/lon + coordinates as distances in degrees differ from distances in meters + on the Earth surface, in particular for higher latitude and distances + larger than 100km. If more accuracy is needed, please use 'haversine' + distance metric. This however is slower for (quasi-)gridded data, + and works only for non-gridded data. + """ + + try: + if not equal_crs(coord_gdf.crs, centroids.crs): + raise ValueError('Set hazard and GeoDataFrame to same CRS first!') + except AttributeError: + # If the coord_gdf has no crs defined (or no valid geometry column), + # no error is raised and it is assumed that the user set the crs correctly + pass + + if centroids.meta: + assigned = match_grid_points( + coord_gdf.longitude.values, coord_gdf.latitude.values, + centroids.meta['width'], centroids.meta['height'], + centroids.meta['transform']) + else: + assigned = match_coordinates( + np.stack([coord_gdf.latitude.values, coord_gdf.longitude.values], axis=1), + centroids.coord, distance=distance, threshold=threshold) + return assigned + @numba.njit def _dist_sqr_approx(lats1, lons1, cos_lats1, lats2, lons2): """Compute squared equirectangular approximation distance. Values need diff --git a/climada/util/test/test_coordinates.py b/climada/util/test/test_coordinates.py index 4fb1a55617..552b6c47df 100644 --- a/climada/util/test/test_coordinates.py +++ b/climada/util/test/test_coordinates.py @@ -23,6 +23,7 @@ from pathlib import Path from cartopy.io import shapereader +import pandas as pd import geopandas as gpd import numpy as np from pyproj.crs import CRS as PCRS @@ -36,6 +37,7 @@ from climada import CONFIG from climada.util.constants import HAZ_DEMO_FL, DEF_CRS, ONE_LAT_KM, DEMO_DIR +from climada.hazard.base import Centroids import climada.util.coordinates as u_coord DATA_DIR = CONFIG.util.test_data.dir() @@ -493,28 +495,124 @@ def test_country_to_iso(self): class TestAssign(unittest.TestCase): """Test coordinate assignment functions""" - def test_assign_grid_points(self): - """Test assign_grid_points function""" + def test_match_grid_points(self): + """Test match_grid_points function""" res = (1, -0.5) pt_bounds = (5, 40, 10, 50) height, width, transform = u_coord.pts_to_raster_meta(pt_bounds, res) xy = np.array([[5, 50], [6, 50], [5, 49.5], [10, 40]]) - out = u_coord.assign_grid_points(xy[:, 0], xy[:, 1], width, height, transform) + out = u_coord.match_grid_points(xy[:, 0], xy[:, 1], width, height, transform) np.testing.assert_array_equal(out, [0, 1, 6, 125]) res = (0.5, -0.5) height, width, transform = u_coord.pts_to_raster_meta(pt_bounds, res) xy = np.array([[5.1, 49.95], [5.99, 50.05], [5, 49.6], [10.1, 39.8]]) - out = u_coord.assign_grid_points(xy[:, 0], xy[:, 1], width, height, transform) + out = u_coord.match_grid_points(xy[:, 0], xy[:, 1], width, height, transform) np.testing.assert_array_equal(out, [0, 2, 11, 230]) res = (1, -1) pt_bounds = (-20, -40, -10, -30) height, width, transform = u_coord.pts_to_raster_meta(pt_bounds, res) xy = np.array([[-10, -40], [-30, -40]]) - out = u_coord.assign_grid_points(xy[:, 0], xy[:, 1], width, height, transform) + out = u_coord.match_grid_points(xy[:, 0], xy[:, 1], width, height, transform) np.testing.assert_array_equal(out, [120, -1]) + def test_match_centroids(self): + """Test match_centroids function.""" + #Test 1: Raster data + meta = { + 'count': 1, 'crs': DEF_CRS, + 'width': 20, 'height': 10, + 'transform': rasterio.Affine(1.5, 0.0, -20, 0.0, -1.4, 8) + } + centroids = Centroids(meta=meta) + df = pd.DataFrame({ + 'longitude': np.array([ + -20.1, -20.0, -19.8, -19.0, -18.6, -18.4, + -19.0, -19.0, -19.0, -19.0, + -20.1, 0.0, 10.1, 10.1, 10.1, 0.0, -20.2, -20.3, + -6.4, 9.8, 0.0, + ]), + 'latitude': np.array([ + 7.3, 7.3, 7.3, 7.3, 7.3, 7.3, + 8.1, 7.9, 6.7, 6.5, + 8.1, 8.2, 8.3, 0.0, -6.1, -6.2, -6.3, 0.0, + -1.9, -1.7, 0.0, + ]) + }) + gdf = gpd.GeoDataFrame(df,geometry=gpd.points_from_xy(df.longitude, df.latitude), + crs=DEF_CRS) + assigned = u_coord.match_centroids(gdf,centroids) + + expected_result = [ + # constant y-value, varying x-value + -1, 0, 0, 0, 0, 1, + # constant x-value, varying y-value + -1, 0, 0, 20, + # out of bounds: topleft, top, topright, right, bottomright, bottom, bottomleft, left + -1, -1, -1, -1, -1, -1, -1, -1, + # some explicit points within the raster + 149, 139, 113, + ] + np.testing.assert_array_equal(assigned,expected_result) + + # Test 2: Vector data (copied from test_match_coordinates) + # note that the coordinates are in lat/lon + gdf_coords = np.array([(0.2, 2), (0, 0), (0, 2), (2.1, 3), (1, 1), (-1, 1), (0, 179.9)]) + df = pd.DataFrame({ + 'longitude': gdf_coords[:, 1], + 'latitude': gdf_coords[:, 0] + }) + gdf = gpd.GeoDataFrame(df,geometry=gpd.points_from_xy(df.longitude, df.latitude), + crs=DEF_CRS) + + coords_to_assign = np.array([(2.1, 3), (0, 0), (0, 2), (0.9, 1.0), (0, -179.9)]) + centroids = Centroids( + lat=coords_to_assign[:, 0], + lon=coords_to_assign[:, 1], + geometry = gpd.GeoSeries(crs=DEF_CRS) + ) + centroids_empty = Centroids() + + expected_results = [ + # test with different thresholds (in km) + (100, [2, 1, 2, 0, 3, -1, 4]), + (20, [-1, 1, 2, 0, 3, -1, -1]), + (0, [-1, 1, 2, 0, -1, -1, -1]), + ] + + for distance in ["euclidean", "haversine", "approx"]: + for thresh, result in expected_results: + assigned = u_coord.match_centroids( + gdf, centroids, distance=distance, threshold=thresh) + np.testing.assert_array_equal(assigned, result) + + #test empty centroids + result = [-1, -1, -1, -1, -1, -1, -1] + assigned_idx = u_coord.match_centroids( + gdf, centroids_empty, distance=distance, threshold=thresh) + np.testing.assert_array_equal(assigned_idx, result) + + # Test 3: non matching crs + df = pd.DataFrame({ + 'longitude': [10, 20, 30], + 'latitude': [50, 60, 70] + }) + gdf = gpd.GeoDataFrame(df,geometry=gpd.points_from_xy(df.longitude, df.latitude), + crs = 'EPSG:4326') + + coords_to_assign = np.array([(2.1, 3), (0, 0), (0, 2), (0.9, 1.0), (0, -179.9)]) + centroids = Centroids( + lat=[1100000,1200000], + lon=[2500000,2600000], + geometry = gpd.GeoSeries(crs='EPSG:2056') + ) + + with self.assertRaises(ValueError) as cm: + u_coord.match_centroids(gdf, centroids) + self.assertIn('Set hazard and GeoDataFrame to same CRS first!', + str(cm.exception)) + def test_dist_sqr_approx_pass(self): """Test approximate distance helper function.""" lats1 = 45.5 @@ -529,7 +627,7 @@ def test_dist_sqr_approx_pass(self): def test_wrong_distance_fail(self): """Check exception is thrown when wrong distance is given""" with self.assertRaises(ValueError) as cm: - u_coord.assign_coordinates(np.ones((10, 2)), np.ones((7, 2)), distance='distance') + u_coord.match_coordinates(np.ones((10, 2)), np.ones((7, 2)), distance='distance') self.assertIn('Coordinate assignment with "distance" distance is not supported.', str(cm.exception)) @@ -652,7 +750,7 @@ def normal_pass(self, dist): centroids, exposures = self.data_input_values() # Interpolate with default threshold - neighbors = u_coord.assign_coordinates(exposures, centroids, distance=dist) + neighbors = u_coord.match_coordinates(exposures, centroids, distance=dist) # Reference output ref_neighbors = self.data_ref() # Check results @@ -668,7 +766,7 @@ def normal_warning(self, dist): # Interpolate with lower threshold to raise warnings threshold = 40 with self.assertLogs('climada.util.coordinates', level='INFO') as cm: - neighbors = u_coord.assign_coordinates( + neighbors = u_coord.match_coordinates( exposures, centroids, distance=dist, threshold=threshold) self.assertIn("Distance to closest centroid", cm.output[1]) @@ -685,7 +783,7 @@ def repeat_coord_pass(self, dist): exposures[2, :] = exposures[0, :] # Interpolate with default threshold - neighbors = u_coord.assign_coordinates(exposures, centroids, distance=dist) + neighbors = u_coord.match_coordinates(exposures, centroids, distance=dist) # Check output neighbors have same size as coordinates self.assertEqual(len(neighbors), exposures.shape[0]) @@ -700,7 +798,7 @@ def antimeridian_warning(self, dist): # Interpolate with lower threshold to raise warnings threshold = 100 with self.assertLogs('climada.util.coordinates', level='INFO') as cm: - neighbors = u_coord.assign_coordinates( + neighbors = u_coord.match_coordinates( exposures, centroids, distance=dist, threshold=threshold) self.assertIn("Distance to closest centroid", cm.output[1]) @@ -786,12 +884,12 @@ def test_diff_outcomes(self): ] for dist, ref, kwargs in zip(dist_list, ref_neighbors, kwargs_list): - neighbors = u_coord.assign_coordinates( + neighbors = u_coord.match_coordinates( exposures, centroids, distance=dist, threshold=threshold, **kwargs) np.testing.assert_array_equal(neighbors, ref) - def test_assign_coordinates(self): - """Test assign_coordinates function""" + def test_match_coordinates(self): + """Test match_coordinates function""" # note that the coordinates are in lat/lon coords = np.array([(0.2, 2), (0, 0), (0, 2), (2.1, 3), (1, 1), (-1, 1), (0, 179.9)]) coords_to_assign = np.array([(2.1, 3), (0, 0), (0, 2), (0.9, 1.0), (0, -179.9)]) @@ -808,7 +906,7 @@ def test_assign_coordinates(self): coords_typed = coords.astype(test_dtype) coords_to_assign_typed = coords_to_assign.astype(test_dtype) for thresh, result in expected_results: - assigned_idx = u_coord.assign_coordinates( + assigned_idx = u_coord.match_coordinates( coords_typed, coords_to_assign_typed, distance=distance, threshold=thresh) np.testing.assert_array_equal(assigned_idx, result) @@ -816,14 +914,14 @@ def test_assign_coordinates(self): #test empty coords_to_assign coords_to_assign_empty = np.array([]) result = [-1, -1, -1, -1, -1, -1, -1] - assigned_idx = u_coord.assign_coordinates( + assigned_idx = u_coord.match_coordinates( coords, coords_to_assign_empty, distance=distance, threshold=thresh) np.testing.assert_array_equal(assigned_idx, result) #test empty coords coords_empty = np.array([]) result = np.array([]) - assigned_idx = u_coord.assign_coordinates( + assigned_idx = u_coord.match_coordinates( coords_empty, coords_to_assign, distance=distance, threshold=thresh) np.testing.assert_array_equal(assigned_idx, result)