diff --git a/tests/data/geolife_long/trips.csv b/tests/data/geolife_long/trips.csv index e910b174..88202dac 100644 --- a/tests/data/geolife_long/trips.csv +++ b/tests/data/geolife_long/trips.csv @@ -1,15 +1,15 @@ id,user_id,started_at,finished_at,origin_staypoint_id,destination_staypoint_id,geom -0,0,2008-10-23 02:53:04+00:00,2008-10-23 03:05:00+00:00,,0.0,"MULTIPOINT (116.3184169999999966 39.9847019999999986, 116.2987250000000046 39.9840190000000035)" -1,0,2008-10-23 04:08:07+00:00,2008-10-23 04:34:37+00:00,0.0,1.0,"MULTIPOINT (116.2987250000000046 39.9840190000000035, 116.3248009999999937 39.9997290000000021)" -2,0,2008-10-23 09:42:25+00:00,2008-10-23 10:45:41+00:00,1.0,5.0,"MULTIPOINT (116.3248009999999937 39.9997290000000021, 116.3216824999999943 40.0089049999999986)" -3,0,2008-10-23 11:09:22+00:00,2008-10-23 11:10:57+00:00,5.0,6.0,"MULTIPOINT (116.3216824999999943 40.0089049999999986, 116.3209779999999967 40.0092690000000033)" -4,0,2008-10-24 02:09:59+00:00,2008-10-24 02:47:06+00:00,6.0,,"MULTIPOINT (116.3209779999999967 40.0092690000000033, 116.3211620000000011 40.0092089999999985)" -5,1,2008-10-23 05:53:05+00:00,2008-10-23 06:01:42+00:00,,10.0,"MULTIPOINT (116.3192360000000036 39.9840939999999989, 116.3275380000000041 39.9780510000000007)" -6,1,2008-10-23 10:32:53+00:00,2008-10-23 11:10:19+00:00,10.0,11.0,"MULTIPOINT (116.3275380000000041 39.9780510000000007, 116.3064135000000050 40.0138019999999983)" -7,1,2008-10-23 11:49:08+00:00,2008-10-23 11:49:48+00:00,11.0,12.0,"MULTIPOINT (116.3064135000000050 40.0138019999999983, 116.3065320000000042 40.0138200000000026)" -8,1,2008-10-23 23:43:46+00:00,2008-10-24 00:23:03+00:00,12.0,14.0,"MULTIPOINT (116.3065320000000042 40.0138200000000026, 116.3266370000000052 39.9779920000000004)" -9,1,2008-10-24 01:45:41+00:00,2008-10-24 02:02:46+00:00,14.0,15.0,"MULTIPOINT (116.3266370000000052 39.9779920000000004, 116.3084029999999984 39.9810390000000027)" -10,1,2008-10-24 02:28:19+00:00,2008-10-24 02:31:32+00:00,15.0,16.0,"MULTIPOINT (116.3084029999999984 39.9810390000000027, 116.3101889999999941 39.9813740000000024)" -11,1,2008-10-24 03:16:35+00:00,2008-10-24 04:12:50+00:00,16.0,20.0,"MULTIPOINT (116.3101889999999941 39.9813740000000024, 116.3267620000000022 39.9779330000000002)" -12,1,2008-10-24 05:28:05+00:00,2008-10-24 05:39:50+00:00,20.0,21.0,"MULTIPOINT (116.3267620000000022 39.9779330000000002, 116.3112610000000018 39.9827119999999994)" -13,1,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,21.0,,"MULTIPOINT (116.3112610000000018 39.9827119999999994, 116.3270629999999954 39.9778990000000007)" +0,0,2008-10-23 02:53:04+00:00,2008-10-23 03:05:00+00:00,,0.0,"MULTIPOINT (116.318417 39.984702, 116.29872033333335 39.98398866666667)" +1,0,2008-10-23 04:08:07+00:00,2008-10-23 04:34:37+00:00,0.0,1.0,"MULTIPOINT (116.29872033333335 39.98398866666667, 116.324803 39.99972133333333)" +2,0,2008-10-23 09:42:25+00:00,2008-10-23 10:45:41+00:00,1.0,5.0,"MULTIPOINT (116.324803 39.99972133333333, 116.321629 40.00890557142857)" +3,0,2008-10-23 11:09:22+00:00,2008-10-23 11:10:57+00:00,5.0,6.0,"MULTIPOINT (116.321629 40.00890557142857, 116.32097166666667 40.00928)" +4,0,2008-10-24 02:09:59+00:00,2008-10-24 02:47:06+00:00,6.0,,"MULTIPOINT (116.32097166666667 40.00928, 116.321162 40.009209)" +5,1,2008-10-23 05:53:05+00:00,2008-10-23 06:01:42+00:00,,10.0,"MULTIPOINT (116.319236 39.984094, 116.32752433333334 39.978049666666664)" +6,1,2008-10-23 10:32:53+00:00,2008-10-23 11:10:19+00:00,10.0,11.0,"MULTIPOINT (116.32752433333334 39.978049666666664, 116.30641350000002 40.013802)" +7,1,2008-10-23 11:49:08+00:00,2008-10-23 11:49:48+00:00,11.0,12.0,"MULTIPOINT (116.30641350000002 40.013802, 116.30653043609027 40.01383060902254)" +8,1,2008-10-23 23:43:46+00:00,2008-10-24 00:23:03+00:00,12.0,14.0,"MULTIPOINT (116.30653043609027 40.01383060902254, 116.32663666666667 39.977995)" +9,1,2008-10-24 01:45:41+00:00,2008-10-24 02:02:46+00:00,14.0,15.0,"MULTIPOINT (116.32663666666667 39.977995, 116.3084012 39.981039200000005)" +10,1,2008-10-24 02:28:19+00:00,2008-10-24 02:31:32+00:00,15.0,16.0,"MULTIPOINT (116.3084012 39.981039200000005, 116.31014576923077 39.98138838461539)" +11,1,2008-10-24 03:16:35+00:00,2008-10-24 04:12:50+00:00,16.0,20.0,"MULTIPOINT (116.31014576923077 39.98138838461539, 116.32675288888888 39.97793)" +12,1,2008-10-24 05:28:05+00:00,2008-10-24 05:39:50+00:00,20.0,21.0,"MULTIPOINT (116.32675288888888 39.97793, 116.31125442857143 39.982705714285714)" +13,1,2008-10-24 06:08:42+00:00,2008-10-24 06:35:50+00:00,21.0,,"MULTIPOINT (116.31125442857143 39.982705714285714, 116.327063 39.977899)" diff --git a/tests/preprocessing/test_positionfixes.py b/tests/preprocessing/test_positionfixes.py index d63b1d85..ed5ed814 100644 --- a/tests/preprocessing/test_positionfixes.py +++ b/tests/preprocessing/test_positionfixes.py @@ -305,6 +305,24 @@ def test_unknown_distance_metric(self, example_positionfixes): ) +class Test__create_new_staypoints: + """Test __create_new_staypoints.""" + + def test_planar_crs(self, geolife_pfs_sp_long): + """Test if planar crs are handled as well""" + pfs, _ = geolife_pfs_sp_long + _, sp_wgs84 = pfs.as_positionfixes.generate_staypoints( + method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True + ) + pfs = pfs.set_crs(2056, allow_override=True) + _, sp_lv95 = pfs.as_positionfixes.generate_staypoints( + method="sliding", dist_threshold=100, time_threshold=5.0, include_last=True + ) + sp_lv95.set_crs(4326, allow_override=True, inplace=True) + # planar and non-planar differ only if we experience a wrap in coords like [+180, -180] + assert_geodataframe_equal(sp_wgs84, sp_lv95, check_less_precise=True) + + class TestGenerate_triplegs: """Tests for generate_triplegs() method.""" diff --git a/tests/preprocessing/test_staypoints.py b/tests/preprocessing/test_staypoints.py index 5f50f4a2..0b7e86b0 100644 --- a/tests/preprocessing/test_staypoints.py +++ b/tests/preprocessing/test_staypoints.py @@ -7,7 +7,7 @@ import pytest from shapely.geometry import Point from sklearn.cluster import DBSCAN -from geopandas.testing import assert_geodataframe_equal +from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal import trackintel as ti from trackintel.geogr.distances import calculate_distance_matrix @@ -219,8 +219,7 @@ def test_dbscan_loc(self): other_locs = gpd.GeoDataFrame(other_locs, columns=["user_id", "id", "center"], geometry="center", crs=sp.crs) other_locs.set_index("id", inplace=True) - assert all(other_locs["center"] == locs["center"]) - assert all(other_locs.index == locs.index) + assert_geoseries_equal(other_locs["center"], locs["center"], check_less_precise=True) def test_dbscan_user_dataset(self): """Test user and dataset location generation.""" diff --git a/tests/preprocessing/test_triplegs.py b/tests/preprocessing/test_triplegs.py index 341eda26..1fe7cc0d 100644 --- a/tests/preprocessing/test_triplegs.py +++ b/tests/preprocessing/test_triplegs.py @@ -71,7 +71,7 @@ def test_generate_trips(self, example_triplegs_higher_gap_threshold): ["user_id", "started_at", "finished_at", "origin_staypoint_id", "destination_staypoint_id", "geom"] ] # test if generated trips are equal - assert_geodataframe_equal(trips_loaded, trips) + assert_geodataframe_equal(trips_loaded, trips, check_less_precise=True) def test_trip_wo_geom(self, example_triplegs_higher_gap_threshold): """Test if the add_geometry parameter shows correct behavior""" diff --git a/tests/preprocessing/test_util.py b/tests/preprocessing/test_util.py index cfca5aa6..426322da 100644 --- a/tests/preprocessing/test_util.py +++ b/tests/preprocessing/test_util.py @@ -1,10 +1,13 @@ import datetime +import geopandas as gpd +from geopandas.testing import assert_geoseries_equal import pandas as pd -from pandas.testing import assert_frame_equal import pytest +from pandas.testing import assert_frame_equal +from shapely.geometry import MultiPoint, Point -from trackintel.preprocessing.util import calc_temp_overlap, _explode_agg +from trackintel.preprocessing.util import _explode_agg, calc_temp_overlap, angle_centroid_multipoints @pytest.fixture @@ -88,3 +91,18 @@ def test_index_dtype_with_None(self): returned_df = _explode_agg("id", "c", orig_df, agg_df) solution_df = pd.DataFrame(orig) assert_frame_equal(returned_df, solution_df) + + +class TestAngleCentroidMultipoints: + """Test util method angle_centroid_multipoints""" + + # test adapted from https://rosettacode.org/wiki/Averages/Mean_angle + a = Point((130, 45)) + b = MultiPoint([(160, 10), (-170, 20)]) + c = MultiPoint([(20, 0), (30, 10), (40, 20)]) + d = MultiPoint([(350, 0), (10, 0)]) + e = MultiPoint([(90, 0), (180, 0), (270, 0), (360, 0)]) + g = gpd.GeoSeries([a, b, c, d, e]) + g_solution = gpd.GeoSeries([a, Point([175, 15]), Point([30, 10]), Point(0, 0), Point(-90, 0)]) + g = gpd.GeoSeries(angle_centroid_multipoints(g)) + assert_geoseries_equal(g, g_solution, check_less_precise=True) diff --git a/trackintel/preprocessing/positionfixes.py b/trackintel/preprocessing/positionfixes.py index 9adcb641..4a1bdae3 100644 --- a/trackintel/preprocessing/positionfixes.py +++ b/trackintel/preprocessing/positionfixes.py @@ -4,10 +4,10 @@ import geopandas as gpd import numpy as np import pandas as pd -from shapely.geometry import LineString, Point +from shapely.geometry import LineString -from trackintel.geogr.distances import haversine_dist -from trackintel.preprocessing.util import applyParallel, _explode_agg +from trackintel.geogr.distances import check_gdf_planar, haversine_dist +from trackintel.preprocessing.util import _explode_agg, angle_centroid_multipoints, applyParallel def generate_staypoints( @@ -411,7 +411,7 @@ def _generate_staypoints_sliding_user( y = df[geo_col].y.to_numpy() ret_sp = [] - start = 0 + curr = start = 0 for curr in range(1, len(df)): # the gap of two consecutive positionfixes should not be too long @@ -451,8 +451,12 @@ def __create_new_staypoints(start, end, pfs, elevation_flag, geo_col, last_flag= # if end is the last pfs, we want to include the info from it as well if last_flag: end = len(pfs) + points = pfs[geo_col].iloc[start:end].unary_union + if check_gdf_planar(pfs): + new_sp[geo_col] = points.centroid + else: + new_sp[geo_col] = angle_centroid_multipoints(points)[0] - new_sp[geo_col] = Point(pfs[geo_col].iloc[start:end].x.median(), pfs[geo_col].iloc[start:end].y.median()) if elevation_flag: new_sp["elevation"] = pfs["elevation"].iloc[start:end].median() new_sp["pfs_id"] = pfs.index[start:end].to_list() diff --git a/trackintel/preprocessing/staypoints.py b/trackintel/preprocessing/staypoints.py index 36b0bbd8..9b62793a 100644 --- a/trackintel/preprocessing/staypoints.py +++ b/trackintel/preprocessing/staypoints.py @@ -4,8 +4,8 @@ from sklearn.cluster import DBSCAN import warnings -from trackintel.geogr.distances import meters_to_decimal_degrees -from trackintel.preprocessing.util import applyParallel +from trackintel.geogr.distances import meters_to_decimal_degrees, check_gdf_planar +from trackintel.preprocessing.util import applyParallel, angle_centroid_multipoints def generate_locations( @@ -136,9 +136,11 @@ def generate_locations( # filter staypoints not belonging to locations locs = locs.loc[locs["location_id"] != -1] - with warnings.catch_warnings(): # TODO: fix bug for geographic crs #437 - warnings.simplefilter("ignore", category=UserWarning) - locs["center"] = locs.geometry.centroid # creates warning for geographic crs + if check_gdf_planar(locs): + locs["center"] = locs.geometry.centroid + else: + # error of wrapping e.g. mean([-180, +180]) -> own function needed + locs["center"] = angle_centroid_multipoints(locs.geometry) # extent is the convex hull of the geometry locs["extent"] = locs.geometry.convex_hull @@ -154,7 +156,7 @@ def generate_locations( else: locs.loc[pointLine_idx, "extent"] = locs.loc[pointLine_idx, "extent"].buffer(epsilon) - locs = locs.set_geometry("center") + locs = locs.set_geometry("center", crs=sp.crs) locs = locs[["user_id", "location_id", "center", "extent"]] # index management diff --git a/trackintel/preprocessing/util.py b/trackintel/preprocessing/util.py index ab74ea95..a1f9c31a 100644 --- a/trackintel/preprocessing/util.py +++ b/trackintel/preprocessing/util.py @@ -1,6 +1,11 @@ from datetime import timedelta + +import geopandas as gpd +import numpy as np import pandas as pd +import pygeos from joblib import Parallel, delayed +from shapely.geometry.base import BaseGeometry from tqdm import tqdm @@ -103,3 +108,33 @@ def _explode_agg(column, agg, orig_df, agg_df): temp = temp[temp[column].notna()] temp.index = temp[column] return orig_df.join(temp[agg], how="left") + + +def angle_centroid_multipoints(geometry): + """Calculate the mean of angles of MultiPoints + + Parameters + ---------- + geometry : GeoSeries, shapely.geometry.Point, shapely.geometry.MultiPoint + Should contain only Points or MultiPoints any other lead to wrong results. + + Returns + ------- + geopandas.GeometryArray + Centroid of geometries (shapely.Point) + """ + g = pygeos.from_shapely(geometry) + g, index = pygeos.get_coordinates(g, return_index=True) + # number of coordinate pairs per MultiPoint + count = np.bincount(index) + x, y = g[:, 0], g[:, 1] + # calculate mean of y Coordinates -> no wrapping + y = np.bincount(index, weights=y) / count + # calculate mean of x Coordinates with wrapping + x_rad = np.deg2rad(x) + x_sin = np.bincount(index, weights=np.sin(x_rad)) / count + x_cos = np.bincount(index, weights=np.cos(x_rad)) / count + x = np.rad2deg(np.arctan2(x_sin, x_cos)) + # shapely Geometry has no crs information + crs = None if isinstance(geometry, BaseGeometry) else geometry.crs + return gpd.points_from_xy(x, y, crs=crs)