Skip to content

Commit

Permalink
Merge pull request #602 from CLIMADA-project/feature/assign_centroids…
Browse files Browse the repository at this point in the history
…_impact

refactor exp.assign_centroids and added imp.match_centroids
  • Loading branch information
timschmi95 authored Mar 23, 2023
2 parents f627a9e + 48eb26d commit 1e2c99d
Show file tree
Hide file tree
Showing 7 changed files with 269 additions and 39 deletions.
10 changes: 8 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ Removed:
### Added

- `Impact.impact_at_reg` method for aggregating impacts per country or custom region [#642](https://github.com/CLIMADA-project/climada_python/pull/642)
- `Impact.match_centroids` convenience method for matching (hazard) centroids to impact objects [#602](https://github.com/CLIMADA-project/climada_python/pull/602)
- `climada.util.coordinates.match_centroids` method for matching (hazard) centroids to GeoDataFrames [#602](https://github.com/CLIMADA-project/climada_python/pull/602)

### Changed
### Changed

- Improved error messages from `climada.CONFIG` in case of missing configuration values [#670](https://github.com/CLIMADA-project/climada_python/pull/670)
- Refactored `Exposure.assign_centroids` using a new util function `u_coord.match_centroids` [#602](https://github.com/CLIMADA-project/climada_python/pull/602)
- Renamed `climada.util.coordinate.assign_grid_points` to `match_grid_points` and `climada.util.coordinates.assign_coordinates` to `match_coordinates`
[#602](https://github.com/CLIMADA-project/climada_python/pull/602)

### Fixed
- `util.lines_polys_handler` solve polygon disaggregation issue in metre-based projection [#666](https://github.com/CLIMADA-project/climada_python/pull/666)

- `util.lines_polys_handler` solve polygon disaggregation issue in metre-based projection [#666](https://github.com/CLIMADA-project/climada_python/pull/666)

### Deprecated

Expand Down
34 changes: 33 additions & 1 deletion climada/engine/impact.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -1672,7 +1672,7 @@ def _selected_events_idx(self, event_ids, event_names, dates, nb_events):
return sel_ev

def _selected_exposures_idx(self, coord_exp):
assigned_idx = u_coord.assign_coordinates(self.coord_exp, coord_exp, threshold=0)
assigned_idx = u_coord.match_coordinates(self.coord_exp, coord_exp, threshold=0)
sel_exp = (assigned_idx >= 0).nonzero()[0]
if sel_exp.size == 0:
LOGGER.warning("No exposure coordinates match the selection.")
Expand Down Expand Up @@ -1783,6 +1783,38 @@ def stack_attribute(attr_name: str) -> np.ndarray:
**kwargs,
)

def match_centroids(self, hazard, distance='euclidean',
threshold=u_coord.NEAREST_NEIGHBOR_THRESHOLD):
"""
Finds the closest hazard centroid for each impact coordinate.
Creates a temporary GeoDataFrame and uses ``u_coord.match_centroids()``.
See there for details and parameters
Parameters
----------
hazard : Hazard
Hazard to match (with raster or vector centroids).
distance : str, optional
Distance to use in case of vector centroids.
Possible values are "euclidean", "haversine" and "approx".
Default: "euclidean"
threshold : float
If the distance (in km) to the nearest neighbor exceeds `threshold`,
the index `-1` is assigned.
Set `threshold` to 0, to disable nearest neighbor matching.
Default: 100 (km)
Returns
-------
np.array
array of closest Hazard centroids, aligned with the Impact's `coord_exp` array
"""

return u_coord.match_centroids(
self._build_exp().gdf,
hazard.centroids,
distance=distance,
threshold=threshold)

@dataclass
class ImpactFreqCurve():
Expand Down
15 changes: 15 additions & 0 deletions climada/engine/test/test_impact.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,20 @@ def test__exp_build_event(self):
self.assertEqual(exp.value_unit, imp.unit)
self.assertEqual(exp.ref_year, 0)

class TestMatchCentroids(unittest.TestCase):

def test_match_centroids(self):
"Test that hazard centroids get assigned correctly"
exp = ENT.exposures
exp.assign_centroids(HAZ)
fake_eai_exp = np.arange(len(exp.gdf))
fake_at_event = np.arange(HAZ.size)
fake_aai_agg = np.sum(fake_eai_exp)
imp = Impact.from_eih(exp, ENT.impact_funcs, HAZ,
fake_at_event, fake_eai_exp, fake_aai_agg)
imp_centr = imp.match_centroids(HAZ)
np.testing.assert_array_equal(imp_centr, exp.gdf.centr_TC)


class TestImpactH5IO(unittest.TestCase):
"""Tests for reading and writing Impact from/to H5 files"""
Expand Down Expand Up @@ -1106,4 +1120,5 @@ def write_tag(group, tag_kwds):
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpact))
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpactH5IO))
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestImpactConcat))
TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestAssignCentroids))
unittest.TextTestRunner(verbosity=2).run(TESTS)
28 changes: 16 additions & 12 deletions climada/entity/exposures/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,11 @@ def assign_centroids(self, hazard, distance='euclidean',
threshold=u_coord.NEAREST_NEIGHBOR_THRESHOLD,
overwrite=True):
"""Assign for each exposure coordinate closest hazard coordinate.
The Exposures ``gdf`` will be altered by this method. It will have an additional
(or modified) column named ``centr_[hazard.HAZ_TYPE]`` after the call.
Uses the utility function ``u_coord.match_centroids``. See there for details
and parameters.
The value -1 is used for distances larger than ``threshold`` in point distances.
In case of raster hazards the value -1 is used for centroids outside of the raster.
Expand All @@ -392,9 +397,10 @@ def assign_centroids(self, hazard, distance='euclidean',
See Also
--------
climada.util.coordinates.assign_coordinates
climada.util.coordinates.match_grid_points: method to associate centroids to
exposure points when centroids is a raster
climada.util.coordinates.match_coordinates:
method to associate centroids to exposure points
Notes
-----
The default order of use is:
Expand Down Expand Up @@ -422,18 +428,16 @@ def assign_centroids(self, hazard, distance='euclidean',

LOGGER.info('Matching %s exposures with %s centroids.',
str(self.gdf.shape[0]), str(hazard.centroids.size))

if not u_coord.equal_crs(self.crs, hazard.centroids.crs):
raise ValueError('Set hazard and exposure to same CRS first!')
if hazard.centroids.meta:
assigned = u_coord.assign_grid_points(
self.gdf.longitude.values, self.gdf.latitude.values,
hazard.centroids.meta['width'], hazard.centroids.meta['height'],
hazard.centroids.meta['transform'])
else:
assigned = u_coord.assign_coordinates(
np.stack([self.gdf.latitude.values, self.gdf.longitude.values], axis=1),
hazard.centroids.coord, distance=distance, threshold=threshold)
self.gdf[centr_haz] = assigned
# Note: equal_crs is tested here, rather than within match_centroids(),
# because exp.gdf.crs may not be defined, but exp.crs must be defined.

assigned_centr = u_coord.match_centroids(self.gdf, hazard.centroids,
distance=distance, threshold=threshold)
self.gdf[centr_haz] = assigned_centr


def set_geometry_points(self, scheduler=None):
"""Set geometry attribute of GeoDataFrame with Points from latitude and
Expand Down
12 changes: 6 additions & 6 deletions climada/hazard/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,7 @@ def select_tight(self, buffer=NEAREST_NEIGHBOR_THRESHOLD/ONE_LAT_KM,
See also
--------
self.select: Method to select centroids by lat/lon extent
util.coordinates.assign_coordinates: algorithm to match centroids.
util.coordinates.match_coordinates: algorithm to match centroids.
"""

Expand Down Expand Up @@ -2320,7 +2320,7 @@ def append(self, *others):
# map individual centroids objects to union
centroids = Centroids.union(*[haz.centroids for haz in haz_list])
hazcent_in_cent_idx_list = [
u_coord.assign_coordinates(haz.centroids.coord, centroids.coord, threshold=0)
u_coord.match_coordinates(haz.centroids.coord, centroids.coord, threshold=0)
for haz in haz_list_nonempty
]

Expand Down Expand Up @@ -2408,7 +2408,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
Centroids instance on which to map the hazard.
threshold: int or float
Threshold (in km) for mapping haz.centroids not in centroids.
Argument is passed to climada.util.coordinates.assign_coordinates.
Argument is passed to climada.util.coordinates.match_coordinates.
Default: 100 (km)
Returns
Expand All @@ -2423,7 +2423,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
See Also
--------
util.coordinates.assign_coordinates: algorithm to match centroids.
util.coordinates.match_coordinates: algorithm to match centroids.
"""
# define empty hazard
Expand All @@ -2432,7 +2432,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):

# indices for mapping matrices onto common centroids
if centroids.meta:
new_cent_idx = u_coord.assign_grid_points(
new_cent_idx = u_coord.match_grid_points(
self.centroids.lon, self.centroids.lat,
centroids.meta['width'], centroids.meta['height'],
centroids.meta['transform'])
Expand All @@ -2441,7 +2441,7 @@ def change_centroids(self, centroids, threshold=NEAREST_NEIGHBOR_THRESHOLD):
"the raster defined by centroids.meta."
" Please choose a larger raster.")
else:
new_cent_idx = u_coord.assign_coordinates(
new_cent_idx = u_coord.match_coordinates(
self.centroids.coord, centroids.coord, threshold=threshold
)
if -1 in new_cent_idx:
Expand Down
79 changes: 77 additions & 2 deletions climada/util/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,12 @@ def get_region_gridpoints(countries=None, regions=None, resolution=150,
lat, lon = [ar.ravel() for ar in [lat, lon]]
return lat, lon

def assign_grid_points(x, y, grid_width, grid_height, grid_transform):
def assign_grid_points(*args, **kwargs):
"""This function has been renamed, use ``match_grid_points`` instead."""
LOGGER.warning("This function has been renamed, use match_grid_points instead.")
return match_grid_points(*args, **kwargs)

def match_grid_points(x, y, grid_width, grid_height, grid_transform):
"""To each coordinate in `x` and `y`, assign the closest centroid in the given raster grid
Make sure that your grid specification is relative to the same coordinate reference system as
Expand Down Expand Up @@ -961,7 +966,12 @@ def assign_grid_points(x, y, grid_width, grid_height, grid_transform):
assigned[(y_i < 0) | (y_i >= grid_height)] = -1
return assigned

def assign_coordinates(coords, coords_to_assign, distance="euclidean",
def assign_coordinates(*args, **kwargs):
"""This function has been renamed, use ``match_coordinates`` instead."""
LOGGER.warning("This function has been renamed, use match_coordinates instead.")
return match_coordinates(*args, **kwargs)

def match_coordinates(coords, coords_to_assign, distance="euclidean",
threshold=NEAREST_NEIGHBOR_THRESHOLD, **kwargs):
"""To each coordinate in `coords`, assign a matching coordinate in `coords_to_assign`
Expand Down Expand Up @@ -1057,6 +1067,71 @@ def assign_coordinates(coords, coords_to_assign, distance="euclidean",
coords_to_assign, coords[not_assigned_idx_mask], threshold, **kwargs)
return assigned_idx

def match_centroids(coord_gdf, centroids, distance='euclidean',
threshold=NEAREST_NEIGHBOR_THRESHOLD):
"""Assign to each gdf coordinate point its closest centroids's coordinate.
If disatances > threshold in points' distances, -1 is returned.
If centroids are in a raster and coordinate point is outside of it ``-1`` is assigned
Parameters
----------
coord_gdf : gpd.GeoDataFrame
GeoDataframe with defined latitude/longitude column and crs
centroids : Centroids
(Hazard) centroids to match (as raster or vector centroids).
distance : str, optional
Distance to use in case of vector centroids.
Possible values are "euclidean", "haversine" and "approx".
Default: "euclidean"
threshold : float, optional
If the distance (in km) to the nearest neighbor exceeds `threshold`,
the index `-1` is assigned.
Set `threshold` to 0, to disable nearest neighbor matching.
Default: ``NEAREST_NEIGHBOR_THRESHOLD`` (100km)
See Also
--------
climada.util.coordinates.match_grid_points: method to associate centroids to
coordinate points when centroids is a raster
climada.util.coordinates.match_coordinates: method to associate centroids to
coordinate points
Notes
-----
The default order of use is:
1. if centroid raster is defined, assign the closest raster point
to each gdf coordinate point.
2. if no raster, assign centroids to the nearest neighbor using
euclidian metric
Both cases can introduce innacuracies for coordinates in lat/lon
coordinates as distances in degrees differ from distances in meters
on the Earth surface, in particular for higher latitude and distances
larger than 100km. If more accuracy is needed, please use 'haversine'
distance metric. This however is slower for (quasi-)gridded data,
and works only for non-gridded data.
"""

try:
if not equal_crs(coord_gdf.crs, centroids.crs):
raise ValueError('Set hazard and GeoDataFrame to same CRS first!')
except AttributeError:
# If the coord_gdf has no crs defined (or no valid geometry column),
# no error is raised and it is assumed that the user set the crs correctly
pass

if centroids.meta:
assigned = match_grid_points(
coord_gdf.longitude.values, coord_gdf.latitude.values,
centroids.meta['width'], centroids.meta['height'],
centroids.meta['transform'])
else:
assigned = match_coordinates(
np.stack([coord_gdf.latitude.values, coord_gdf.longitude.values], axis=1),
centroids.coord, distance=distance, threshold=threshold)
return assigned

@numba.njit
def _dist_sqr_approx(lats1, lons1, cos_lats1, lats2, lons2):
"""Compute squared equirectangular approximation distance. Values need
Expand Down
Loading

0 comments on commit 1e2c99d

Please sign in to comment.