From 3c0900e4098b88a0cbf25eae4b9f9165d6ca164b Mon Sep 17 00:00:00 2001 From: Ye Hong Date: Sun, 4 Apr 2021 17:57:10 +0200 Subject: [PATCH 1/3] TST: add tests for visualization --- tests/preprocessing/test_staypoints.py | 15 ++++- tests/test_core.py | 11 ++++ tests/test_staypoints.py | 21 ------- tests/test_visualization.py | 61 ------------------- tests/visualization/test_locations.py | 60 ++++++++++++++++++ tests/visualization/test_positionfixes.py | 20 ++++++ tests/visualization/test_staypoints.py | 46 ++++++++++++++ tests/visualization/test_triplegs.py | 52 ++++++++++++++++ tests/visualization/test_util.py | 27 +++++++- .../analysis/transport_mode_identification.py | 34 +++++------ trackintel/core.py | 2 +- trackintel/preprocessing/staypoints.py | 22 +++---- trackintel/visualization/util.py | 5 +- 13 files changed, 259 insertions(+), 117 deletions(-) create mode 100644 tests/test_core.py delete mode 100644 tests/test_staypoints.py delete mode 100644 tests/test_visualization.py create mode 100644 tests/visualization/test_locations.py create mode 100644 tests/visualization/test_positionfixes.py create mode 100644 tests/visualization/test_staypoints.py create mode 100644 tests/visualization/test_triplegs.py diff --git a/tests/preprocessing/test_staypoints.py b/tests/preprocessing/test_staypoints.py index d63f35a6..ff3501e3 100644 --- a/tests/preprocessing/test_staypoints.py +++ b/tests/preprocessing/test_staypoints.py @@ -11,6 +11,8 @@ class TestGenerate_locations: + """Tests for generate_locations() method.""" + def test_generate_locations_dbscan_hav_euc(self): stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") @@ -172,4 +174,15 @@ def test_generate_locations_index_start(self): class TestCreate_activity_flag: - pass + """Tests for create_activity_flag() method.""" + + def test_create_activity_flag(self): + spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + spts_test = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") + + activity_true = spts_test["activity"].copy() + spts_test["activity"] = False + + spts_test = spts_test.as_staypoints.create_activity_flag() + + pd.testing.assert_series_equal(spts_test["activity"], activity_true) diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 00000000..4f5ceef6 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,11 @@ +import trackintel as ti + + +class TestPrint_version: + """Tests for print_version() method.""" + + def test_print_version(self, capsys): + """Check if the correct message is printed.""" + ti.print_version() + captured = capsys.readouterr() + assert "This is trackintel v" in captured.out diff --git a/tests/test_staypoints.py b/tests/test_staypoints.py deleted file mode 100644 index 9cb6bc4a..00000000 --- a/tests/test_staypoints.py +++ /dev/null @@ -1,21 +0,0 @@ -import trackintel as ti -import os -import pandas as pd -import os - -import pandas as pd - -import trackintel as ti - - -class TestCreate_activity_flag: - def test_create_activity_flag(self): - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts_test = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") - - activity_true = spts_test["activity"].copy() - spts_test["activity"] = False - - spts_test = spts_test.as_staypoints.create_activity_flag() - - pd.testing.assert_series_equal(spts_test["activity"], activity_true) diff --git a/tests/test_visualization.py b/tests/test_visualization.py deleted file mode 100644 index d8f4d888..00000000 --- a/tests/test_visualization.py +++ /dev/null @@ -1,61 +0,0 @@ -import pytest -import os -import matplotlib as mpl - -mpl.use("Agg") - -import trackintel as ti - - -class TestIO: - def test_positionfixes_plot(self): - """Use trackintel visualization function to plot positionfixes and check if file exists.""" - - tmp_file = os.path.join("tests", "data", "positionfixes_plot.png") - pfs_file = os.path.join("tests", "data", "positionfixes.csv") - pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") - pfs.as_positionfixes.plot(out_filename=tmp_file, plot_osm=False) - assert os.path.exists(tmp_file) - os.remove(tmp_file) - - def test_triplegs_plot(self): - """Use trackintel visualization function to plot triplegs and check if file exists.""" - - tmp_file = os.path.join("tests", "data", "triplegs_plot.png") - pfs_file = os.path.join("tests", "data", "positionfixes.csv") - pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") - - tpls_file = os.path.join("tests", "data", "triplegs.csv") - tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col="id", crs="EPSG:4326") - tpls.as_triplegs.plot(out_filename=tmp_file, positionfixes=pfs, plot_osm=False) - assert os.path.exists(tmp_file) - os.remove(tmp_file) - - def test_staypoints_plot(self): - """Use trackintel visualization function to plot staypoints and check if file exists.""" - - tmp_file = os.path.join("tests", "data", "staypoints_plot.png") - pfs_file = os.path.join("tests", "data", "positionfixes.csv") - pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") - - stps_file = os.path.join("tests", "data", "staypoints.csv") - stps = ti.read_staypoints_csv(stps_file, sep=";", index_col="id", crs="EPSG:4326") - stps.as_staypoints.plot(out_filename=tmp_file, radius=0.01, positionfixes=pfs, plot_osm=False) - assert os.path.exists(tmp_file) - os.remove(tmp_file) - - def test_locations_plot(self): - """Use trackintel visualization function to plot locations and check if file exists.""" - - tmp_file = os.path.join("tests", "data", "locations_plot.png") - pfs_file = os.path.join("tests", "data", "positionfixes.csv") - pfs = pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") - stps_file = os.path.join("tests", "data", "staypoints.csv") - stps = ti.read_staypoints_csv(stps_file, sep=";", index_col="id", crs="EPSG:4326") - locs_file = os.path.join("tests", "data", "locations.csv") - locs = ti.read_locations_csv(locs_file, sep=";", index_col="id", crs="EPSG:4326") - locs.as_locations.plot( - out_filename=tmp_file, radius=120, positionfixes=pfs, staypoints=stps, staypoints_radius=100, plot_osm=False - ) - assert os.path.exists(tmp_file) - os.remove(tmp_file) diff --git a/tests/visualization/test_locations.py b/tests/visualization/test_locations.py new file mode 100644 index 00000000..1fd9831b --- /dev/null +++ b/tests/visualization/test_locations.py @@ -0,0 +1,60 @@ +import pytest +import os +import matplotlib as mpl + +mpl.use("Agg") + +import trackintel as ti +from trackintel.visualization.util import regular_figure + + +@pytest.fixture +def test_data(): + """Read tests data from files.""" + pfs_file = os.path.join("tests", "data", "positionfixes.csv") + pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") + stps_file = os.path.join("tests", "data", "staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, sep=";", index_col="id", crs="EPSG:4326") + locs_file = os.path.join("tests", "data", "locations.csv") + locs = ti.read_locations_csv(locs_file, sep=";", index_col="id", crs="EPSG:4326") + return pfs, stps, locs + + +class TestPlot_locations: + """Tests for plot_locations() method.""" + + def test_locations_plot(self, test_data): + """Use trackintel visualization function to plot locations and check if the file exists.""" + pfs, stps, locs = test_data + tmp_file = os.path.join("tests", "data", "locations_plot.png") + locs.as_locations.plot( + out_filename=tmp_file, radius=120, positionfixes=pfs, staypoints=stps, staypoints_radius=100, plot_osm=False + ) + assert os.path.exists(tmp_file) + os.remove(tmp_file) + + def test_axis(self, test_data): + """Test the use of regular_figure() to create axis.""" + pfs, _, locs = test_data + tmp_file = os.path.join("tests", "data", "locations_plot.png") + _, ax = regular_figure() + + locs.as_locations.plot( + out_filename=tmp_file, + radius=120, + positionfixes=pfs, + plot_osm=False, + axis=ax, + ) + assert os.path.exists(tmp_file) + os.remove(tmp_file) + + def test_parameter(self, test_data): + """Test other parameter configurations.""" + pfs, _, locs = test_data + tmp_file = os.path.join("tests", "data", "locations_plot.png") + + # plot only location + locs.as_locations.plot(out_filename=tmp_file, plot_osm=True) + assert os.path.exists(tmp_file) + os.remove(tmp_file) \ No newline at end of file diff --git a/tests/visualization/test_positionfixes.py b/tests/visualization/test_positionfixes.py new file mode 100644 index 00000000..85b93edd --- /dev/null +++ b/tests/visualization/test_positionfixes.py @@ -0,0 +1,20 @@ +import pytest +import os +import matplotlib as mpl + +mpl.use("Agg") + +import trackintel as ti + + +class TestPlot_positionfixes: + """Tests for plot_positionfixes() method.""" + + def test_positionfixes_plot(self): + """Use trackintel visualization function to plot positionfixes and check if the file exists.""" + tmp_file = os.path.join("tests", "data", "positionfixes_plot.png") + pfs_file = os.path.join("tests", "data", "positionfixes.csv") + pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") + pfs.as_positionfixes.plot(out_filename=tmp_file, plot_osm=False) + assert os.path.exists(tmp_file) + os.remove(tmp_file) \ No newline at end of file diff --git a/tests/visualization/test_staypoints.py b/tests/visualization/test_staypoints.py new file mode 100644 index 00000000..c5b09e11 --- /dev/null +++ b/tests/visualization/test_staypoints.py @@ -0,0 +1,46 @@ +import pytest +import os +import matplotlib as mpl + +mpl.use("Agg") + +import trackintel as ti + + +@pytest.fixture +def test_data(): + """Read tests data from files.""" + pfs_file = os.path.join("tests", "data", "positionfixes.csv") + pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") + + stps_file = os.path.join("tests", "data", "staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, sep=";", index_col="id", crs="EPSG:4326") + return pfs, stps + + +class TestPlot_staypoints: + """Tests for plot_staypoints() method.""" + + def test_staypoints_plot(self, test_data): + """Use trackintel visualization function to plot staypoints and check if the file exists.""" + pfs, stps = test_data + tmp_file = os.path.join("tests", "data", "staypoints_plot.png") + + stps.as_staypoints.plot(out_filename=tmp_file, radius=0.01, positionfixes=pfs, plot_osm=False) + assert os.path.exists(tmp_file) + os.remove(tmp_file) + + def test_parameter(self, test_data): + """Test other parameter configurations.""" + pfs, stps = test_data + tmp_file = os.path.join("tests", "data", "staypoints_plot.png") + + # no radius + stps.as_staypoints.plot(out_filename=tmp_file, positionfixes=pfs, plot_osm=False) + assert os.path.exists(tmp_file) + os.remove(tmp_file) + + # with osm + stps.as_staypoints.plot(out_filename="staypoints_plot", plot_osm=True) + assert os.path.exists("staypoints_plot.png") + os.remove("staypoints_plot.png") \ No newline at end of file diff --git a/tests/visualization/test_triplegs.py b/tests/visualization/test_triplegs.py new file mode 100644 index 00000000..a327ae1e --- /dev/null +++ b/tests/visualization/test_triplegs.py @@ -0,0 +1,52 @@ +import pytest +import os +import matplotlib as mpl + +mpl.use("Agg") + +import trackintel as ti +from trackintel.visualization.util import regular_figure + + +@pytest.fixture +def test_data(): + """Read tests data from files.""" + pfs_file = os.path.join("tests", "data", "positionfixes.csv") + pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") + + tpls_file = os.path.join("tests", "data", "triplegs.csv") + tpls = ti.read_triplegs_csv(tpls_file, sep=";", index_col="id", crs="EPSG:4326") + return pfs, tpls + + +class TestPlot_triplegs: + """Tests for plot_triplegs() method.""" + + def test_triplegs_plot(self, test_data): + """Use trackintel visualization function to plot triplegs and check if the file exists.""" + pfs, tpls = test_data + + tmp_file = os.path.join("tests", "data", "triplegs_plot.png") + tpls.as_triplegs.plot(out_filename=tmp_file, positionfixes=pfs, plot_osm=False) + assert os.path.exists(tmp_file) + os.remove(tmp_file) + + def test_axis(self, test_data): + """Test the use of regular_figure() to create axis.""" + _, tpls = test_data + tmp_file = os.path.join("tests", "data", "triplegs_plot.png") + _, ax = regular_figure() + + tpls.as_triplegs.plot(out_filename=tmp_file, axis=ax) + assert os.path.exists(tmp_file) + os.remove(tmp_file) + + def test_parameter(self, test_data): + """Test other parameter configurations.""" + _, tpls = test_data + tmp_file = os.path.join("tests", "data", "triplegs_plot.png") + + # test plot_osm + tpls.as_triplegs.plot(out_filename=tmp_file, plot_osm=True) + assert os.path.exists(tmp_file) + os.remove(tmp_file) \ No newline at end of file diff --git a/tests/visualization/test_util.py b/tests/visualization/test_util.py index b44af16a..b6c7a650 100644 --- a/tests/visualization/test_util.py +++ b/tests/visualization/test_util.py @@ -1,14 +1,17 @@ import os import pytest from geopandas.testing import assert_geodataframe_equal +import numpy as np import trackintel as ti +from trackintel.visualization.util import a4_figsize class TestTransform_gdf_to_wgs84: + """Tests for transform_gdf_to_wgs84() method.""" + def test_transformation(self): """Check if data gets transformed.""" - file = os.path.join("tests", "data", "positionfixes.csv") pfs = ti.read_positionfixes_csv(file, sep=";", crs="EPSG:4326", index_col=None) pfs_2056 = pfs.to_crs("EPSG:2056") @@ -17,8 +20,28 @@ def test_transformation(self): def test_crs_warning(self): """Check if warning is raised for data without crs.""" - file = os.path.join("tests", "data", "positionfixes.csv") pfs = ti.read_positionfixes_csv(file, sep=";", crs=None, index_col=None) with pytest.warns(UserWarning): ti.visualization.util.transform_gdf_to_wgs84(pfs) + + +class TestA4_figsize: + """Tests for a4_figsize() method.""" + + def test_parameter(self, caplog): + """Test different parameter configurations.""" + fig_width, fig_height = a4_figsize(columns=1) + assert np.allclose([3.30708661, 2.04389193], [fig_width, fig_height]) + + fig_width, fig_height = a4_figsize(columns=1.5) + assert np.allclose([5.07874015, 3.13883403], [fig_width, fig_height]) + + fig_width, fig_height = a4_figsize(columns=2) + assert np.allclose([6.85039370, 4.23377614], [fig_width, fig_height]) + + with pytest.raises(ValueError): + a4_figsize(columns=3) + + a4_figsize(fig_height_mm=250) + assert "fig_height too large" in caplog.text diff --git a/trackintel/analysis/transport_mode_identification.py b/trackintel/analysis/transport_mode_identification.py index afb75888..99bf15f5 100644 --- a/trackintel/analysis/transport_mode_identification.py +++ b/trackintel/analysis/transport_mode_identification.py @@ -6,7 +6,7 @@ def predict_transport_mode(triplegs, method="simple-coarse", **kwargs): """ Predict the transport mode of triplegs. - + Predict/impute the transport mode that was likely chosen to cover the given tripleg, e.g., car, bicycle, or walk. @@ -14,22 +14,22 @@ def predict_transport_mode(triplegs, method="simple-coarse", **kwargs): ---------- method: {'simple-coarse'} The following methods are available for transport mode inference/prediction: - - - 'simple-coarse' : Uses simple heuristics to predict coarse transport classes. - + + - 'simple-coarse' : Uses simple heuristics to predict coarse transport classes. + Returns ------- triplegs : GeoDataFrame (as trackintel triplegs) The triplegs with added column mode, containing the predicted transport modes. - + Notes ----- - ``simple-coarse`` method includes ``{'slow_mobility', 'motorized_mobility', 'fast_mobility'}``. - In the default classification, ``slow_mobility`` (<15 km/h) includes transport modes such as - walking or cycling, ``motorized_mobility`` (<100 km/h) modes such as car or train, and - ``fast_mobility`` (>100 km/h) modes such as high-speed rail or airplanes. + ``simple-coarse`` method includes ``{'slow_mobility', 'motorized_mobility', 'fast_mobility'}``. + In the default classification, ``slow_mobility`` (<15 km/h) includes transport modes such as + walking or cycling, ``motorized_mobility`` (<100 km/h) modes such as car or train, and + ``fast_mobility`` (>100 km/h) modes such as high-speed rail or airplanes. These categories are default values and can be overwritten using the keyword argument categories. - + """ if method == "simple-coarse": # implemented as keyword argument if later other methods that don't use categories are added @@ -37,25 +37,25 @@ def predict_transport_mode(triplegs, method="simple-coarse", **kwargs): "categories", {15 / 3.6: "slow_mobility", 100 / 3.6: "motorized_mobility", np.inf: "fast_mobility"} ) - return predict_transport_mode_simple_coarse(triplegs, categories) + return _predict_transport_mode_simple_coarse(triplegs, categories) else: raise NameError(f"Method {method} not known for predicting tripleg transport modes.") -def predict_transport_mode_simple_coarse(triplegs_in, categories): +def _predict_transport_mode_simple_coarse(triplegs_in, categories): """ - Predict a transport mode out of three coarse classes. - - Implements a simple speed based heuristic (over the whole tripleg). + Predict a transport mode out of three coarse classes. + + Implements a simple speed based heuristic (over the whole tripleg). As such, it is very fast, but also very simple and coarse. Parameters ---------- triplegs : trackintel triplegs GeoDataFrame The triplegs for the transport mode prediction. - + categories : dict, optional - The categories for the speed classification {upper_boundary:'category_name'}. + The categories for the speed classification {upper_boundary:'category_name'}. The unit for the upper boundary is m/s. The default is {15/3.6: 'slow_mobility', 100/3.6: 'motorized_mobility', np.inf: 'fast_mobility'}. diff --git a/trackintel/core.py b/trackintel/core.py index 1a54459f..52f41825 100644 --- a/trackintel/core.py +++ b/trackintel/core.py @@ -2,7 +2,7 @@ def print_version(): - """Prints the framework version.""" + """Print the framework version.""" print( "This is trackintel v" diff --git a/trackintel/preprocessing/staypoints.py b/trackintel/preprocessing/staypoints.py index 85de7e41..d8390d4c 100644 --- a/trackintel/preprocessing/staypoints.py +++ b/trackintel/preprocessing/staypoints.py @@ -21,34 +21,34 @@ def generate_locations( method : {'dbscan'} Method to create locations. - + - 'dbscan' : Uses the DBSCAN algorithm to cluster staypoints. epsilon : float, default 100 - The epsilon for the 'dbscan' method. if 'distance_matrix_metric' is 'haversine' + The epsilon for the 'dbscan' method. if 'distance_matrix_metric' is 'haversine' or 'euclidean', the unit is in meters. num_samples : int, default 1 - The minimal number of samples in a cluster. + The minimal number of samples in a cluster. distance_matrix_metric: {'haversine', 'euclidean'} - The distance matrix used by the applied method. Any mentioned below are possible: + The distance matrix used by the applied method. Any mentioned below are possible: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html - + agg_level: {'user','dataset'} The level of aggregation when generating locations: - + - 'user' : locations are generated independently per-user. - 'dataset' : shared locations are generated for all users. - + Returns ------- ret_sp: GeoDataFrame (as trackintel staypoints) The original staypoints with a new column ``[`location_id`]``. - + ret_loc: GeoDataFrame (as trackintel locations) - The generated locations. - + The generated locations. + Examples -------- >>> spts.as_staypoints.generate_locations(method='dbscan', epsilon=100, num_samples=1) @@ -189,7 +189,7 @@ def create_activity_flag(staypoints, method="time_threshold", time_threshold=5.0 time_threshold : float, default = 5 (minutes) The time threshold for which a staypoint is considered an activity in minutes. Used by method 'time_threshold' - + activity_column_name : str , default = 'activity' The name of the newly created column that holds the activity flag. diff --git a/trackintel/visualization/util.py b/trackintel/visualization/util.py index 8fe74e7f..390cafe6 100644 --- a/trackintel/visualization/util.py +++ b/trackintel/visualization/util.py @@ -15,7 +15,7 @@ def a4_figsize(fig_height_mm=None, columns=2): - """Generates sizes for a figure that fits on an A4 page. + """Generate sizes for a figure that fits on an A4 page. The sizes are taken from: http://www.springer.com/computer/journal/450 > Artwork and Illustrations Guidelines > Figure Placement and Size @@ -33,7 +33,6 @@ def a4_figsize(fig_height_mm=None, columns=2): (float, float) The width and height in which to plot a figure to fit on an A4 sheet. """ - if columns == 1: fig_width_mm = 84.0 elif columns == 1.5: @@ -122,7 +121,7 @@ def save_fig(out_filename, tight="tight", formats=["png", "pdf"]): def transform_gdf_to_wgs84(gdf): - """Transforms a GeoDataFrame into WGS84. + """Transform a GeoDataFrame into WGS84. Additionally checks if data has CRS or is already in WGS84. From c375d806c77071360e0af92ebf29076f329a524e Mon Sep 17 00:00:00 2001 From: Ye Hong Date: Sun, 4 Apr 2021 18:05:21 +0200 Subject: [PATCH 2/3] CLN: change to stps --- docs/tutorial.rst | 4 +- examples/preprocess_trajectories.py | 24 +++--- tests/analysis/test_modal_split.py | 6 +- tests/geogr/test_distances.py | 26 +++---- tests/io/test_dataset_reader.py | 4 +- tests/preprocessing/test_filter.py | 24 +++--- tests/preprocessing/test_positionfixes.py | 2 +- tests/preprocessing/test_staypoints.py | 12 +-- tests/preprocessing/test_triplegs.py | 12 +-- tests/visualization/test_modal_split.py | 4 +- trackintel/analysis/tracking_quality.py | 2 +- trackintel/preprocessing/filter.py | 2 +- trackintel/preprocessing/positionfixes.py | 30 ++++---- trackintel/preprocessing/staypoints.py | 8 +- trackintel/preprocessing/triplegs.py | 90 ++++++++++++----------- 15 files changed, 128 insertions(+), 122 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 75cb40a7..c4393da7 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -52,10 +52,10 @@ This will additionally plot the original positionfixes, as well as the underlyin street network from OSM. We can for example continue by extracting and plotting locations (locations that "contain" multiple staypoints, i.e., are visited often by a user):: - locs = spts.as_staypoints.extract_locations(method='dbscan', + locs = stps.as_staypoints.extract_locations(method='dbscan', epsilon=meters_to_decimal_degrees(120, 47.5), num_samples=3) locs.as_locations.plot(out_filename='locations.png', - radius=meters_to_decimal_degrees(120, 47.5), positionfixes=pfs, staypoints=spts, + radius=meters_to_decimal_degrees(120, 47.5), positionfixes=pfs, staypoints=stps, staypoints_radius=meters_to_decimal_degrees(100, 47.5), plot_osm=True) This will extract locations and plot them to a file called ``locations.png``, additionally diff --git a/examples/preprocess_trajectories.py b/examples/preprocess_trajectories.py index 12da0299..20db6f10 100644 --- a/examples/preprocess_trajectories.py +++ b/examples/preprocess_trajectories.py @@ -17,39 +17,39 @@ pfs = ti.read_positionfixes_csv("examples/data/geolife_trajectory.csv", sep=";", crs="EPSG:4326", index_col=None) pfs.as_positionfixes.plot(out_filename="examples/out/gps_trajectory_positionfixes.png", plot_osm=True) -_, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=100, time_threshold=5) -spts.as_staypoints.plot( +_, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=100, time_threshold=5) +stps.as_staypoints.plot( out_filename="examples/out/gps_trajectory_staypoints.png", radius=100, positionfixes=pfs, plot_osm=True ) -_, locs = spts.as_staypoints.generate_locations(method="dbscan", epsilon=0.01, num_samples=3) +_, locs = stps.as_staypoints.generate_locations(method="dbscan", epsilon=0.01, num_samples=3) locs.as_locations.plot( out_filename="examples/out/gps_trajectory_locations.png", radius=120, positionfixes=pfs, - staypoints=spts, + staypoints=stps, staypoints_radius=100, plot_osm=True, ) -_, tpls = pfs.as_positionfixes.generate_triplegs(stps_input=spts) +_, tpls = pfs.as_positionfixes.generate_triplegs(stps_input=stps) tpls.as_triplegs.plot( - out_filename="examples/out/gpsies_trajectory_triplegs.png", staypoints=spts, staypoints_radius=100, plot_osm=True + out_filename="examples/out/gpsies_trajectory_triplegs.png", staypoints=stps, staypoints_radius=100, plot_osm=True ) # Geolife trajectory. pfs = ti.read_positionfixes_csv("examples/data/geolife_trajectory.csv", sep=";", crs="EPSG:4326", index_col=None) pfs.as_positionfixes.plot(out_filename="examples/out/geolife_trajectory_positionfixes.png", plot_osm=False) -_, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=100, time_threshold=10) -spts.as_staypoints.plot( +_, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=100, time_threshold=10) +stps.as_staypoints.plot( out_filename="examples/out/geolife_trajectory_staypoints.png", radius=100, positionfixes=pfs, plot_osm=True ) # Google trajectory. pfs = ti.read_positionfixes_csv("examples/data/google_trajectory.csv", sep=";", crs="EPSG:4326", index_col=None) -_, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=75, time_threshold=10) -spts.as_staypoints.plot( +_, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=75, time_threshold=10) +stps.as_staypoints.plot( out_filename="examples/out/google_trajectory_staypoints.png", radius=75, positionfixes=pfs, plot_osm=True ) @@ -57,7 +57,7 @@ pfs = ti.read_positionfixes_csv( "examples/data/posmo_trajectory.csv", sep=";", crs="EPSG:4326", index_col=None, tz="UTC" ) -_, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=50, time_threshold=1) -spts.as_staypoints.plot( +_, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=50, time_threshold=1) +stps.as_staypoints.plot( out_filename="examples/out/posmo_trajectory_staypoints.png", radius=50, positionfixes=pfs, plot_osm=False ) diff --git a/tests/analysis/test_modal_split.py b/tests/analysis/test_modal_split.py index 001a9db2..e0cca0b6 100644 --- a/tests/analysis/test_modal_split.py +++ b/tests/analysis/test_modal_split.py @@ -15,8 +15,8 @@ def read_geolife_with_modes(): pfs, labels = read_geolife(os.path.join("tests", "data", "geolife_modes")) - pfs, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) - _, tpls = pfs.as_positionfixes.generate_triplegs(spts, method="between_staypoints") + pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) + _, tpls = pfs.as_positionfixes.generate_triplegs(stps, method="between_staypoints") tpls_with_modes = geolife_add_modes_to_triplegs(tpls, labels) return tpls_with_modes @@ -36,7 +36,7 @@ def ls_long(): @pytest.fixture def test_triplegs_modal_split(ls_short, ls_long): - """ Triplegs with transport modes that can be aggregated over days and weeks. + """Triplegs with transport modes that can be aggregated over days and weeks. user 0: day 1: 2 triplegs (car + bike) day 2: 1 tripleg (walk) diff --git a/tests/geogr/test_distances.py b/tests/geogr/test_distances.py index 588491f2..a306567a 100644 --- a/tests/geogr/test_distances.py +++ b/tests/geogr/test_distances.py @@ -59,11 +59,11 @@ class TestCalculate_distance_matrix: """Tests for the calculate_distance_matrix() function.""" def test_shape_for_different_array_length(self): - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") + stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") - x = spts.iloc[0:5] - y = spts.iloc[5:15] + x = stps.iloc[0:5] + y = stps.iloc[5:15] d_euc1 = calculate_distance_matrix(X=x, Y=y, dist_metric="euclidean") d_euc2 = calculate_distance_matrix(X=y, Y=x, dist_metric="euclidean") @@ -76,11 +76,11 @@ def test_shape_for_different_array_length(self): assert np.isclose(0, np.sum(np.abs(d_hav1 - d_hav2.T))) def test_keyword_combinations(self): - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") + stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") - x = spts.iloc[0:5] - y = spts.iloc[5:15] + x = stps.iloc[0:5] + y = stps.iloc[5:15] _ = calculate_distance_matrix(X=x, Y=y, dist_metric="euclidean", n_jobs=-1) _ = calculate_distance_matrix(X=y, Y=x, dist_metric="haversine", n_jobs=-1) @@ -92,12 +92,12 @@ def test_keyword_combinations(self): assert np.array_equal(d_euc, d_mink2) def test_compare_haversine_to_scikit_xy(self): - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") - our_d_matrix = calculate_distance_matrix(X=spts, Y=spts, dist_metric="haversine") + stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") + our_d_matrix = calculate_distance_matrix(X=stps, Y=stps, dist_metric="haversine") - x = spts.geometry.x.values - y = spts.geometry.y.values + x = stps.geometry.x.values + y = stps.geometry.y.values x_rad = np.asarray([radians(_) for _ in x]) y_rad = np.asarray([radians(_) for _ in y]) diff --git a/tests/io/test_dataset_reader.py b/tests/io/test_dataset_reader.py index bc030e00..93be32d3 100644 --- a/tests/io/test_dataset_reader.py +++ b/tests/io/test_dataset_reader.py @@ -17,8 +17,8 @@ def read_geolife_modes(): @pytest.fixture def read_geolife_triplegs_with_modes(read_geolife_modes): pfs, labels = read_geolife_modes - pfs, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) - _, tpls = pfs.as_positionfixes.generate_triplegs(spts, method="between_staypoints") + pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) + _, tpls = pfs.as_positionfixes.generate_triplegs(stps, method="between_staypoints") return tpls, labels diff --git a/tests/preprocessing/test_filter.py b/tests/preprocessing/test_filter.py index 919d030c..41ec8d16 100644 --- a/tests/preprocessing/test_filter.py +++ b/tests/preprocessing/test_filter.py @@ -10,11 +10,11 @@ def locs_from_geolife(): """Create locations from geolife staypoints.""" # read staypoints - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") + stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") # cluster staypoints to locations - _, locs = spts.as_staypoints.generate_locations( + _, locs = stps.as_staypoints.generate_locations( method="dbscan", epsilon=10, num_samples=0, distance_matrix_metric="haversine", agg_level="dataset" ) @@ -29,26 +29,26 @@ class TestSpatial_filter: def test_filter_staypoints(self): """Test if spatial_filter works for staypoints.""" # read staypoints and area file - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") + stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + stps = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") extent = gpd.read_file(os.path.join("tests", "data", "area", "tsinghua.geojson")) # the projection needs to be defined: WGS84 - spts.crs = "epsg:4326" - within_spts = spts.as_staypoints.spatial_filter(areas=extent, method="within", re_project=True) - intersects_spts = spts.as_staypoints.spatial_filter(areas=extent, method="intersects", re_project=True) - crosses_spts = spts.as_staypoints.spatial_filter(areas=extent, method="crosses", re_project=True) + stps.crs = "epsg:4326" + within_stps = stps.as_staypoints.spatial_filter(areas=extent, method="within", re_project=True) + intersects_stps = stps.as_staypoints.spatial_filter(areas=extent, method="intersects", re_project=True) + crosses_stps = stps.as_staypoints.spatial_filter(areas=extent, method="crosses", re_project=True) # the result obtained from ArcGIS gis_within_num = 13 - assert len(within_spts) == gis_within_num, ( + assert len(within_stps) == gis_within_num, ( "The spatial filtered sp number should be the same as" + "the one from the result with ArcGIS" ) - assert len(crosses_spts) == 0, "There will be no point crossing area" + assert len(crosses_stps) == 0, "There will be no point crossing area" # For staypoints the result of within and intersects should be the same - assert_geodataframe_equal(within_spts, intersects_spts, check_less_precise=True) + assert_geodataframe_equal(within_stps, intersects_stps, check_less_precise=True) def test_filter_triplegs(self): """Test if spatial_filter works for triplegs.""" diff --git a/tests/preprocessing/test_positionfixes.py b/tests/preprocessing/test_positionfixes.py index aca1017a..b4664cad 100644 --- a/tests/preprocessing/test_positionfixes.py +++ b/tests/preprocessing/test_positionfixes.py @@ -257,7 +257,7 @@ def test_temporal(self, geolife_pfs_stps_long): assert (pfs["diff"] > gap_threshold).all() def test_stps_tpls_overlap(self, geolife_pfs_stps_long): - """Tpls and spts should not overlap when generated using the default extract triplegs method.""" + """Tpls and stps should not overlap when generated using the default extract triplegs method.""" pfs, stps = geolife_pfs_stps_long pfs, tpls = pfs.as_positionfixes.generate_triplegs(stps) diff --git a/tests/preprocessing/test_staypoints.py b/tests/preprocessing/test_staypoints.py index ff3501e3..bab408a9 100644 --- a/tests/preprocessing/test_staypoints.py +++ b/tests/preprocessing/test_staypoints.py @@ -177,12 +177,12 @@ class TestCreate_activity_flag: """Tests for create_activity_flag() method.""" def test_create_activity_flag(self): - spts_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") - spts_test = ti.read_staypoints_csv(spts_file, tz="utc", index_col="id") + stps_file = os.path.join("tests", "data", "geolife", "geolife_staypoints.csv") + stps_test = ti.read_staypoints_csv(stps_file, tz="utc", index_col="id") - activity_true = spts_test["activity"].copy() - spts_test["activity"] = False + activity_true = stps_test["activity"].copy() + stps_test["activity"] = False - spts_test = spts_test.as_staypoints.create_activity_flag() + stps_test = stps_test.as_staypoints.create_activity_flag() - pd.testing.assert_series_equal(spts_test["activity"], activity_true) + pd.testing.assert_series_equal(stps_test["activity"], activity_true) diff --git a/tests/preprocessing/test_triplegs.py b/tests/preprocessing/test_triplegs.py index e5e76430..76d793ab 100644 --- a/tests/preprocessing/test_triplegs.py +++ b/tests/preprocessing/test_triplegs.py @@ -46,7 +46,7 @@ def test_generate_trips(self): pd.testing.assert_frame_equal(trips_loaded, trips) def test_generate_trips_missing_link(self): - """Test nan is assigned for missing link between spts and trips, and tpls and trips.""" + """Test nan is assigned for missing link between stps and trips, and tpls and trips.""" # create trips from geolife (based on positionfixes) pfs, _ = ti.io.dataset_reader.read_geolife(os.path.join("tests", "data", "geolife_long")) pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) @@ -146,13 +146,13 @@ def test_generate_trips_gap_detection(self): # generate trips and a joint staypoint/triplegs dataframe stps_proc, tpls_proc, trips = ti.preprocessing.triplegs.generate_trips(stps_in, tpls_in, gap_threshold=15) - spts_tpls = _create_debug_spts_tpls_data(stps_proc, tpls_proc, gap_threshold=gap_threshold) + stps_tpls = _create_debug_stps_tpls_data(stps_proc, tpls_proc, gap_threshold=gap_threshold) # test if generated trips are equal pd.testing.assert_frame_equal(trips_loaded, trips) # test if generated staypoints/triplegs are equal (especially important for trip ids) - pd.testing.assert_frame_equal(stps_tpls_loaded, spts_tpls, check_dtype=False) + pd.testing.assert_frame_equal(stps_tpls_loaded, stps_tpls, check_dtype=False) def test_generate_trips_id_management(self): """Test if we can generate the example trips based on example data.""" @@ -170,13 +170,13 @@ def test_generate_trips_id_management(self): # generate trips and a joint staypoint/triplegs dataframe gap_threshold = 15 stps, tpls, _ = ti.preprocessing.triplegs.generate_trips(stps, tpls, gap_threshold=gap_threshold) - spts_tpls = _create_debug_spts_tpls_data(stps, tpls, gap_threshold=gap_threshold) + stps_tpls = _create_debug_stps_tpls_data(stps, tpls, gap_threshold=gap_threshold) # test if generated staypoints/triplegs are equal (especially important for trip ids) - pd.testing.assert_frame_equal(stps_tpls_loaded, spts_tpls, check_dtype=False) + pd.testing.assert_frame_equal(stps_tpls_loaded, stps_tpls, check_dtype=False) -def _create_debug_spts_tpls_data(stps, tpls, gap_threshold): +def _create_debug_stps_tpls_data(stps, tpls, gap_threshold): """Preprocess stps and tpls for "test_generate_trips_*.""" # create table with relevant information from triplegs and staypoints. tpls["type"] = "tripleg" diff --git a/tests/visualization/test_modal_split.py b/tests/visualization/test_modal_split.py index 6db33412..9d8fdcd4 100644 --- a/tests/visualization/test_modal_split.py +++ b/tests/visualization/test_modal_split.py @@ -14,8 +14,8 @@ def get_geolife_triplegs_with_modes(): """Get modal split for a small part of the geolife dataset.""" pfs, labels = read_geolife(os.path.join("tests", "data", "geolife_modes")) - pfs, spts = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) - _, tpls = pfs.as_positionfixes.generate_triplegs(spts, method="between_staypoints") + pfs, stps = pfs.as_positionfixes.generate_staypoints(method="sliding", dist_threshold=25, time_threshold=5) + _, tpls = pfs.as_positionfixes.generate_triplegs(stps, method="between_staypoints") tpls_with_modes = geolife_add_modes_to_triplegs(tpls, labels) return tpls_with_modes diff --git a/trackintel/analysis/tracking_quality.py b/trackintel/analysis/tracking_quality.py index b9907952..c68ba0ac 100644 --- a/trackintel/analysis/tracking_quality.py +++ b/trackintel/analysis/tracking_quality.py @@ -49,7 +49,7 @@ def temporal_tracking_quality(source, granularity="all"): >>> # calculate overall tracking quality of stps >>> temporal_tracking_quality(stps, granularity="all") >>> # calculate per-day tracking quality of stps and tpls sequence - >>> temporal_tracking_quality(spts_tpls, granularity="day") + >>> temporal_tracking_quality(stps_tpls, granularity="day") """ required_columns = ["user_id", "started_at", "finished_at"] if any([c not in source.columns for c in required_columns]): diff --git a/trackintel/preprocessing/filter.py b/trackintel/preprocessing/filter.py index 599ccf76..afb78183 100644 --- a/trackintel/preprocessing/filter.py +++ b/trackintel/preprocessing/filter.py @@ -36,7 +36,7 @@ def spatial_filter(source, areas, method="within", re_project=False): Examples -------- - >>> spts.as_staypoints.spatial_filter(areas, method="within", re_project=False) + >>> stps.as_staypoints.spatial_filter(areas, method="within", re_project=False) """ gdf = source.copy() diff --git a/trackintel/preprocessing/positionfixes.py b/trackintel/preprocessing/positionfixes.py index 11e8e277..3ab66cfd 100644 --- a/trackintel/preprocessing/positionfixes.py +++ b/trackintel/preprocessing/positionfixes.py @@ -89,9 +89,9 @@ def generate_staypoints( geo_col = pfs.geometry.name if elevation_flag: - spts_column = ["user_id", "started_at", "finished_at", "elevation", geo_col] + stps_column = ["user_id", "started_at", "finished_at", "elevation", geo_col] else: - spts_column = ["user_id", "started_at", "finished_at", geo_col] + stps_column = ["user_id", "started_at", "finished_at", geo_col] # TODO: tests using a different distance function, e.g., L2 distance if method == "sliding": @@ -131,7 +131,7 @@ def generate_staypoints( stps["id"] = np.arange(len(stps)) stps.set_index("id", inplace=True) - # Assign staypoint_id to ret_pfs if spts is detected + # Assign staypoint_id to ret_pfs if stps is detected if not stps.empty: stps2pfs_map = stps[["pfs_id"]].to_dict()["pfs_id"] @@ -149,9 +149,9 @@ def generate_staypoints( pfs["staypoint_id"] = np.nan pfs = gpd.GeoDataFrame(pfs, geometry=geo_col, crs=pfs.crs) - stps = gpd.GeoDataFrame(stps, columns=spts_column, geometry=geo_col, crs=pfs.crs) + stps = gpd.GeoDataFrame(stps, columns=stps_column, geometry=geo_col, crs=pfs.crs) # rearange column order - stps = stps[spts_column] + stps = stps[stps_column] ## dtype consistency # stps id (generated by this function) should be int64 @@ -159,7 +159,7 @@ def generate_staypoints( # ret_pfs['staypoint_id'] should be Int64 (missing values) pfs["staypoint_id"] = pfs["staypoint_id"].astype("Int64") - # user_id of spts should be the same as ret_pfs + # user_id of stps should be the same as ret_pfs stps["user_id"] = stps["user_id"].astype(pfs["user_id"].dtype) return pfs, stps @@ -234,14 +234,14 @@ def generate_triplegs(pfs_input, stps_input, method="between_staypoints", gap_th insert_index_ls = [] pfs["staypoint_id"] = pd.NA for user_id_this in pfs["user_id"].unique(): - spts_user = stps_input[stps_input["user_id"] == user_id_this] + stps_user = stps_input[stps_input["user_id"] == user_id_this] pfs_user = pfs[pfs["user_id"] == user_id_this] # step 1 # All positionfixes with timestamp between staypoints are assigned the value 0 # Intersect all positionfixes of a user with all staypoints of the same user intervals = pd.IntervalIndex.from_arrays( - spts_user["started_at"], spts_user["finished_at"], closed="left" + stps_user["started_at"], stps_user["finished_at"], closed="left" ) is_in_interval = pfs_user["tracked_at"].apply(lambda x: intervals.contains(x).any()).astype("bool") pfs.loc[is_in_interval[is_in_interval].index, "staypoint_id"] = 0 @@ -250,7 +250,7 @@ def generate_triplegs(pfs_input, stps_input, method="between_staypoints", gap_th # Identify first positionfix after a staypoint # find index of closest positionfix with equal or greater timestamp. tracked_at_sorted = pfs_user["tracked_at"].sort_values() - insert_position_user = tracked_at_sorted.searchsorted(spts_user["finished_at"]) + insert_position_user = tracked_at_sorted.searchsorted(stps_user["finished_at"]) insert_index_user = tracked_at_sorted.iloc[insert_position_user].index # store the insert insert_position_user in an array @@ -390,7 +390,7 @@ def _generate_staypoints_sliding_user( pfs = df.to_dict("records") idx = df.index.to_list() - ret_spts = [] + ret_stps = [] start = 0 # as start begin from 0, curr begin from 1 @@ -409,7 +409,7 @@ def _generate_staypoints_sliding_user( if (delta_t >= (time_threshold * 60)) and (gap_t < gap_threshold * 60): new_stps = __create_new_staypoints(start, curr, pfs, idx, elevation_flag, geo_col) # add staypoint - ret_spts.append(new_stps) + ret_stps.append(new_stps) # distance larger but time too short -> not a stay point # also initializer when new stp is added @@ -423,11 +423,11 @@ def _generate_staypoints_sliding_user( new_stps = __create_new_staypoints(start, curr, pfs, idx, elevation_flag, geo_col, last_flag=True) # add staypoint - ret_spts.append(new_stps) + ret_stps.append(new_stps) - ret_spts = pd.DataFrame(ret_spts) - ret_spts["user_id"] = df["user_id"].unique()[0] - return ret_spts + ret_stps = pd.DataFrame(ret_stps) + ret_stps["user_id"] = df["user_id"].unique()[0] + return ret_stps def __create_new_staypoints(start, end, pfs, idx, elevation_flag, geo_col, last_flag=False): diff --git a/trackintel/preprocessing/staypoints.py b/trackintel/preprocessing/staypoints.py index d8390d4c..10dbf0a3 100644 --- a/trackintel/preprocessing/staypoints.py +++ b/trackintel/preprocessing/staypoints.py @@ -51,7 +51,7 @@ def generate_locations( Examples -------- - >>> spts.as_staypoints.generate_locations(method='dbscan', epsilon=100, num_samples=1) + >>> stps.as_staypoints.generate_locations(method='dbscan', epsilon=100, num_samples=1) """ if agg_level not in ["user", "dataset"]: raise AttributeError("The parameter agg_level must be one of ['user', 'dataset'].") @@ -166,7 +166,7 @@ def generate_locations( ## dtype consistency # locs id (generated by this function) should be int64 ret_loc.index = ret_loc.index.astype("int64") - # location_id of spts can only be in Int64 (missing values) + # location_id of stps can only be in Int64 (missing values) ret_stps["location_id"] = ret_stps["location_id"].astype("Int64") # user_id of ret_loc should be the same as ret_stps ret_loc["user_id"] = ret_loc["user_id"].astype(ret_stps["user_id"].dtype) @@ -200,8 +200,8 @@ def create_activity_flag(staypoints, method="time_threshold", time_threshold=5.0 Examples -------- - >>> spts = spts.as_staypoints.create_activity_flag(method='time_threshold', time_threshold=5) - >>> print(spts['activity']) + >>> stps = stps.as_staypoints.create_activity_flag(method='time_threshold', time_threshold=5) + >>> print(stps['activity']) """ if method == "time_threshold": staypoints[activity_column_name] = staypoints["finished_at"] - staypoints["started_at"] > datetime.timedelta( diff --git a/trackintel/preprocessing/triplegs.py b/trackintel/preprocessing/triplegs.py index a6a2f886..066c95b9 100644 --- a/trackintel/preprocessing/triplegs.py +++ b/trackintel/preprocessing/triplegs.py @@ -9,22 +9,22 @@ def smoothen_triplegs(triplegs, tolerance=1.0, preserve_topology=True): """ Reduce number of points while retaining structure of tripleg. - + A wrapper function using shapely.simplify(): https://shapely.readthedocs.io/en/stable/manual.html#object.simplify - + Parameters ---------- triplegs: GeoDataFrame (as trackintel triplegs) triplegs to be simplified - + tolerance: float, default 1.0 - a higher tolerance removes more points; the units of tolerance are the same as the + a higher tolerance removes more points; the units of tolerance are the same as the projection of the input geometry - + preserve_topology: bool, default True whether to preserve topology. If set to False the Douglas-Peucker algorithm is used. - + Returns ------- ret_tpls: GeoDataFrame (as trackintel triplegs) @@ -89,36 +89,36 @@ def generate_trips(stps_input, tpls_input, gap_threshold=15, print_progress=Fals # we copy the input because we need to add a temporary column tpls = tpls_input.copy() - spts = stps_input.copy() + stps = stps_input.copy() tpls["type"] = "tripleg" - spts["type"] = "staypoint" + stps["type"] = "staypoint" # create table with relevant information from triplegs and staypoints. - spts_tpls = spts[["started_at", "finished_at", "user_id", "type", "activity"]].append( + stps_tpls = stps[["started_at", "finished_at", "user_id", "type", "activity"]].append( tpls[["started_at", "finished_at", "user_id", "type"]] ) # create ID field from index - spts_tpls["id"] = spts_tpls.index + stps_tpls["id"] = stps_tpls.index # transform nan to bool - spts_tpls["activity"] = spts_tpls["activity"] == True + stps_tpls["activity"] = stps_tpls["activity"] == True - spts_tpls.sort_values(by=["user_id", "started_at"], inplace=True) - spts_tpls["started_at_next"] = spts_tpls["started_at"].shift(-1) - spts_tpls["activity_next"] = spts_tpls["activity"].shift(-1) + stps_tpls.sort_values(by=["user_id", "started_at"], inplace=True) + stps_tpls["started_at_next"] = stps_tpls["started_at"].shift(-1) + stps_tpls["activity_next"] = stps_tpls["activity"].shift(-1) if print_progress: tqdm.pandas(desc="User trip generation") trips = ( - spts_tpls.groupby(["user_id"], group_keys=False, as_index=False) + stps_tpls.groupby(["user_id"], group_keys=False, as_index=False) .progress_apply(_generate_trips_user, gap_threshold=gap_threshold) .reset_index(drop=True) ) else: trips = ( - spts_tpls.groupby(["user_id"], group_keys=False, as_index=False) + stps_tpls.groupby(["user_id"], group_keys=False, as_index=False) .apply(_generate_trips_user, gap_threshold=gap_threshold) .reset_index(drop=True) ) @@ -136,47 +136,47 @@ def generate_trips(stps_input, tpls_input, gap_threshold=15, print_progress=Fals temp = pd.DataFrame(ls, columns=[tpls.index.name, "trip_id"]).set_index(tpls.index.name) tpls = tpls.join(temp, how="left") - # assign trip_id to spts, for non-activity spts - trip2spt_map = trips[["spts"]].to_dict()["spts"] + # assign trip_id to stps, for non-activity stps + trip2spt_map = trips[["stps"]].to_dict()["stps"] ls = [] for key, values in trip2spt_map.items(): for value in values: ls.append([value, key]) - temp = pd.DataFrame(ls, columns=[spts.index.name, "trip_id"]).set_index(spts.index.name) - spts = spts.join(temp, how="left") + temp = pd.DataFrame(ls, columns=[stps.index.name, "trip_id"]).set_index(stps.index.name) + stps = stps.join(temp, how="left") - # assign prev_trip_id to spts + # assign prev_trip_id to stps temp = trips[["destination_staypoint_id"]].copy() - temp.rename(columns={"destination_staypoint_id": spts.index.name}, inplace=True) + temp.rename(columns={"destination_staypoint_id": stps.index.name}, inplace=True) temp.index.name = "prev_trip_id" - temp = temp.reset_index().set_index(spts.index.name) - spts = spts.join(temp, how="left") + temp = temp.reset_index().set_index(stps.index.name) + stps = stps.join(temp, how="left") - # assign next_trip_id to spts + # assign next_trip_id to stps temp = trips[["origin_staypoint_id"]].copy() - temp.rename(columns={"origin_staypoint_id": spts.index.name}, inplace=True) + temp.rename(columns={"origin_staypoint_id": stps.index.name}, inplace=True) temp.index.name = "next_trip_id" - temp = temp.reset_index().set_index(spts.index.name) - spts = spts.join(temp, how="left") + temp = temp.reset_index().set_index(stps.index.name) + stps = stps.join(temp, how="left") # final cleaning tpls.drop(columns=["type"], inplace=True) - spts.drop(columns=["type"], inplace=True) - trips.drop(columns=["tpls", "spts"], inplace=True) + stps.drop(columns=["type"], inplace=True) + trips.drop(columns=["tpls", "stps"], inplace=True) ## dtype consistency # trips id (generated by this function) should be int64 trips.index = trips.index.astype("int64") - # trip id of spts and tpls can only be in Int64 (missing values) - spts["trip_id"] = spts["trip_id"].astype("Int64") - spts["prev_trip_id"] = spts["prev_trip_id"].astype("Int64") - spts["next_trip_id"] = spts["next_trip_id"].astype("Int64") + # trip id of stps and tpls can only be in Int64 (missing values) + stps["trip_id"] = stps["trip_id"].astype("Int64") + stps["prev_trip_id"] = stps["prev_trip_id"].astype("Int64") + stps["next_trip_id"] = stps["next_trip_id"].astype("Int64") tpls["trip_id"] = tpls["trip_id"].astype("Int64") # user_id of trips should be the same as tpls trips["user_id"] = trips["user_id"].astype(tpls["user_id"].dtype) - return spts, tpls, trips + return stps, tpls, trips def _generate_trips_user(df, gap_threshold): @@ -273,7 +273,13 @@ def _generate_trips_user(df, gap_threshold): # if user ends generate last trip with unknown destination if (len(temp_trip_stack) > 0) and (_check_trip_stack_has_tripleg(temp_trip_stack)): destination_activity = unknown_activity - trip_ls.append(_create_trip_from_stack(temp_trip_stack, origin_activity, destination_activity,)) + trip_ls.append( + _create_trip_from_stack( + temp_trip_stack, + origin_activity, + destination_activity, + ) + ) # print(trip_ls) trips = pd.DataFrame(trip_ls) @@ -283,11 +289,11 @@ def _generate_trips_user(df, gap_threshold): def _check_trip_stack_has_tripleg(temp_trip_stack): """ Check if a trip has at least 1 tripleg. - + Parameters ---------- temp_trip_stack : list - list of dictionary like elements (either pandas series or python dictionary). + list of dictionary like elements (either pandas series or python dictionary). Contains all elements that will be aggregated into a trip Returns @@ -310,12 +316,12 @@ def _create_trip_from_stack(temp_trip_stack, origin_activity, destination_activi Parameters ---------- temp_trip_stack : list - list of dictionary like elements (either pandas series or python dictionary). + list of dictionary like elements (either pandas series or python dictionary). Contains all elements that will be aggregated into a trip - + origin_activity : dictionary like Either dictionary or pandas series - + destination_activity : dictionary like Either dictionary or pandas series @@ -343,7 +349,7 @@ def _create_trip_from_stack(temp_trip_stack, origin_activity, destination_activi "origin_staypoint_id": origin_activity["id"], "destination_staypoint_id": destination_activity["id"], "tpls": [tripleg["id"] for tripleg in temp_trip_stack if tripleg["type"] == "tripleg"], - "spts": [tripleg["id"] for tripleg in temp_trip_stack if tripleg["type"] == "staypoint"], + "stps": [tripleg["id"] for tripleg in temp_trip_stack if tripleg["type"] == "staypoint"], } return trip_dict_entry From 49fe10aa3452ecb2bb7d7372c19111aaf02bb0be Mon Sep 17 00:00:00 2001 From: Ye Hong Date: Sun, 4 Apr 2021 18:07:58 +0200 Subject: [PATCH 3/3] CLN: black format --- docs/conf.py | 10 +- .../test_transport_mode_identification.py | 4 +- tests/visualization/test_locations.py | 2 +- tests/visualization/test_positionfixes.py | 2 +- tests/visualization/test_staypoints.py | 2 +- tests/visualization/test_triplegs.py | 2 +- trackintel/analysis/tracking_quality.py | 8 +- trackintel/geogr/distances.py | 36 +++--- trackintel/geogr/point_distances.py | 2 +- trackintel/io/dataset_reader.py | 14 +-- trackintel/io/file.py | 114 +++++++++--------- trackintel/io/from_geopandas.py | 90 +++++++------- trackintel/io/postgis.py | 58 ++++----- trackintel/model/locations.py | 16 +-- trackintel/model/positionfixes.py | 16 +-- trackintel/model/staypoints.py | 20 +-- trackintel/model/tours.py | 6 +- trackintel/model/triplegs.py | 18 +-- trackintel/model/trips.py | 8 +- trackintel/model/users.py | 6 +- trackintel/preprocessing/positionfixes.py | 56 ++++----- trackintel/visualization/osm.py | 4 +- 22 files changed, 251 insertions(+), 243 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 77354f3a..12f58ba3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -150,7 +150,15 @@ def setup(app): # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # -html_sidebars = {"**": ["about.html", "navigation.html", "relations.html", "searchbox.html", "donate.html",]} +html_sidebars = { + "**": [ + "about.html", + "navigation.html", + "relations.html", + "searchbox.html", + "donate.html", + ] +} # only defined in 'Alabaster' html_theme # html_theme_options = { diff --git a/tests/analysis/test_transport_mode_identification.py b/tests/analysis/test_transport_mode_identification.py index 6c0029c4..0e6234c2 100644 --- a/tests/analysis/test_transport_mode_identification.py +++ b/tests/analysis/test_transport_mode_identification.py @@ -10,7 +10,7 @@ class TestTransportModeIdentification: def test_check_empty_dataframe(self): - """Assert that the method does not work for empty DataFrames + """Assert that the method does not work for empty DataFrames (but that the rest works fine, e.g., method signature). """ tpls_file = os.path.join("tests", "data", "triplegs_transport_mode_identification.csv") @@ -20,7 +20,7 @@ def test_check_empty_dataframe(self): empty_frame.as_triplegs.predict_transport_mode(method="simple-coarse") def test_simple_coarse_identification_no_crs(self): - """Assert that the simple-coarse transport mode identification throws the correct + """Assert that the simple-coarse transport mode identification throws the correct warning and and yields the correct results for WGS84. """ tpls_file = os.path.join("tests", "data", "triplegs_transport_mode_identification.csv") diff --git a/tests/visualization/test_locations.py b/tests/visualization/test_locations.py index 1fd9831b..92b154ee 100644 --- a/tests/visualization/test_locations.py +++ b/tests/visualization/test_locations.py @@ -57,4 +57,4 @@ def test_parameter(self, test_data): # plot only location locs.as_locations.plot(out_filename=tmp_file, plot_osm=True) assert os.path.exists(tmp_file) - os.remove(tmp_file) \ No newline at end of file + os.remove(tmp_file) diff --git a/tests/visualization/test_positionfixes.py b/tests/visualization/test_positionfixes.py index 85b93edd..5847cf5c 100644 --- a/tests/visualization/test_positionfixes.py +++ b/tests/visualization/test_positionfixes.py @@ -17,4 +17,4 @@ def test_positionfixes_plot(self): pfs = ti.read_positionfixes_csv(pfs_file, sep=";", index_col="id", crs="EPSG:4326") pfs.as_positionfixes.plot(out_filename=tmp_file, plot_osm=False) assert os.path.exists(tmp_file) - os.remove(tmp_file) \ No newline at end of file + os.remove(tmp_file) diff --git a/tests/visualization/test_staypoints.py b/tests/visualization/test_staypoints.py index c5b09e11..e30dbf5f 100644 --- a/tests/visualization/test_staypoints.py +++ b/tests/visualization/test_staypoints.py @@ -43,4 +43,4 @@ def test_parameter(self, test_data): # with osm stps.as_staypoints.plot(out_filename="staypoints_plot", plot_osm=True) assert os.path.exists("staypoints_plot.png") - os.remove("staypoints_plot.png") \ No newline at end of file + os.remove("staypoints_plot.png") diff --git a/tests/visualization/test_triplegs.py b/tests/visualization/test_triplegs.py index a327ae1e..4747e556 100644 --- a/tests/visualization/test_triplegs.py +++ b/tests/visualization/test_triplegs.py @@ -49,4 +49,4 @@ def test_parameter(self, test_data): # test plot_osm tpls.as_triplegs.plot(out_filename=tmp_file, plot_osm=True) assert os.path.exists(tmp_file) - os.remove(tmp_file) \ No newline at end of file + os.remove(tmp_file) diff --git a/trackintel/analysis/tracking_quality.py b/trackintel/analysis/tracking_quality.py index c68ba0ac..d70f783b 100644 --- a/trackintel/analysis/tracking_quality.py +++ b/trackintel/analysis/tracking_quality.py @@ -16,7 +16,7 @@ def temporal_tracking_quality(source, granularity="all"): granularity : {"all", "day", "week", "weekday", "hour"} The level of which the tracking quality is calculated. The default "all" returns the overall tracking quality; "day" the tracking quality by days; "week" the quality - by weeks; "weekday" the quality by day of the week (e.g, Mondays, Tuesdays, etc.) and + by weeks; "weekday" the quality by day of the week (e.g, Mondays, Tuesdays, etc.) and "hour" the quality by hours. Returns @@ -28,11 +28,11 @@ def temporal_tracking_quality(source, granularity="all"): ----- Requires at least the following columns: ``['user_id', 'started_at', 'finished_at']`` - which means the function supports trackintel ``staypoints``, ``triplegs``, ``trips`` and ``tours`` + which means the function supports trackintel ``staypoints``, ``triplegs``, ``trips`` and ``tours`` datamodels and their combinations (e.g., staypoints and triplegs sequence). - + The temporal tracking quality is the ratio of tracking time and the total time extent. It is - calculated and returned per-user in the defined ``granularity``. The possible time extents of + calculated and returned per-user in the defined ``granularity``. The possible time extents of the different granularities are different: - ``all`` considers the time between the latest "finished_at" and the earliest "started_at"; diff --git a/trackintel/geogr/distances.py b/trackintel/geogr/distances.py index aa74d327..53d67033 100644 --- a/trackintel/geogr/distances.py +++ b/trackintel/geogr/distances.py @@ -15,7 +15,7 @@ def calculate_distance_matrix(X, Y=None, dist_metric="haversine", n_jobs=0, **kwds): """ Calculate a distance matrix based on a specific distance metric. - + If only X is given, the pair-wise distances between all elements in X are calculated. If X and Y are given, the distances between all combinations of X and Y are calculated. Distances between elements of X and X, and distances between elements of Y and Y are not calculated. @@ -23,38 +23,38 @@ def calculate_distance_matrix(X, Y=None, dist_metric="haversine", n_jobs=0, **kw Parameters ---------- X : GeoDataFrame (as trackintel staypoints or triplegs) - + Y : GeoDataFrame (as trackintel staypoints or triplegs), optional - + dist_metric: {'haversine', 'euclidean', 'dtw', 'frechet'} - The distance metric to be used for calculating the matrix. - - For staypoints, common choice is 'haversine' or 'euclidean'. This function wraps around - the ``pairwise_distance`` function from scikit-learn if only `X` is given and wraps around the - ``scipy.spatial.distance.cdist`` function if X and Y are given. + The distance metric to be used for calculating the matrix. + + For staypoints, common choice is 'haversine' or 'euclidean'. This function wraps around + the ``pairwise_distance`` function from scikit-learn if only `X` is given and wraps around the + ``scipy.spatial.distance.cdist`` function if X and Y are given. Therefore the following metrics are also accepted: - + via ``scikit-learn``: `[‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’]` - + via ``scipy.spatial.distance``: `[‘braycurtis’, ‘canberra’, ‘chebyshev’, ‘correlation’, ‘dice’, ‘hamming’, ‘jaccard’, ‘kulsinski’, ‘mahalanobis’, ‘minkowski’, ‘rogerstanimoto’, ‘russellrao’, ‘seuclidean’, ‘sokalmichener’, ‘sokalsneath’, ‘sqeuclidean’, ‘yule’]` - - For triplegs, common choice is 'dtw' or 'frechet'. This function uses the implementation + + For triplegs, common choice is 'dtw' or 'frechet'. This function uses the implementation from similaritymeasures. - + n_jobs: int - Number of cores to use: 'dtw', 'frechet' and all distance metrics from `pairwise_distance` (only available + Number of cores to use: 'dtw', 'frechet' and all distance metrics from `pairwise_distance` (only available if only X is given) are parallelized. - - **kwds: + + **kwds: optional keywords passed to the distance functions. Returns ------- D: np.array matrix of shape (len(X), len(X)) or of shape (len(X), len(Y)) if Y is provided. - + """ geom_type = X.geometry.iat[0].geom_type if Y is None: @@ -171,7 +171,7 @@ def meters_to_decimal_degrees(meters, latitude): The meters to convert to degrees. latitude : float - As the conversion is dependent (approximatively) on the latitude where + As the conversion is dependent (approximatively) on the latitude where the conversion happens, this needs to be specified. Use 0 for the equator. Returns diff --git a/trackintel/geogr/point_distances.py b/trackintel/geogr/point_distances.py index cc78167e..3c57dd4a 100644 --- a/trackintel/geogr/point_distances.py +++ b/trackintel/geogr/point_distances.py @@ -4,7 +4,7 @@ def haversine_dist(lon_1, lat_1, lon_2, lat_2, r=6371000): """ Compute the great circle or haversine distance between two coordinates in WGS84. - + Serialized version of the haversine distance. Parameters diff --git a/trackintel/io/dataset_reader.py b/trackintel/io/dataset_reader.py index 95fc1976..201c9584 100644 --- a/trackintel/io/dataset_reader.py +++ b/trackintel/io/dataset_reader.py @@ -32,7 +32,7 @@ def read_geolife(geolife_path): ------- gdf: GeoDataFrame (as trackintel positionfixes) Contains all loaded geolife positionfixes - + labels: dict Dictionary with the available (optional) mode labels. @@ -171,16 +171,16 @@ def geolife_add_modes_to_triplegs( ---------- tpls_in : GeoDataFrame (as trackintel triplegs) Geolife triplegs. - + labels : dictionary Geolife labels as provided by the trackintel `read_geolife` function. - + ratio_threshold : float, default 0.5 How much a label needs to overlap a tripleg to assign a the to this tripleg. - + max_triplegs : int, default 20 Number of neighbors that are considered in the search for matching triplegs. - + max_duration_tripleg : float, default 7 * 24 * 60 * 60 (seconds) Used for a primary filter. All triplegs that are further away in time than 'max_duration_tripleg' from a label won't be considered for matching. @@ -260,10 +260,10 @@ def _calc_overlap_for_candidates(candidates, tpls_this, labels_this, ratio_thres tpls_this : GeoDataFrame (as trackintel triplegs) triplegs of a single user - + labels_this : DataFrame labels of a single user - + ratio_threshold : float, optional How much a label needs to overlap a tripleg to assign a the to this tripleg. diff --git a/trackintel/io/file.py b/trackintel/io/file.py index 36a288d4..e384a209 100644 --- a/trackintel/io/file.py +++ b/trackintel/io/file.py @@ -14,7 +14,7 @@ def read_positionfixes_csv(*args, columns=None, tz=None, index_col=object(), crs=None, **kwargs): """ Read positionfixes from csv file. - + Wraps the pandas read_csv function, extracts longitude and latitude and builds a geopandas GeoDataFrame. This also validates that the ingested data conforms to the trackintel understanding of positionfixes (see @@ -24,14 +24,14 @@ def read_positionfixes_csv(*args, columns=None, tz=None, index_col=object(), crs ---------- columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. - + crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string @@ -46,7 +46,7 @@ def read_positionfixes_csv(*args, columns=None, tz=None, index_col=object(), crs ----- Note that this function is primarily useful if data is available in a longitude/latitude format. If your data already contains a WKT column, - might be easier to just use the GeoPandas import functions + might be easier to just use the GeoPandas import functions :func:`trackintel.io.from_geopandas.read_positionfixes_gpd`. Examples @@ -91,15 +91,15 @@ def read_positionfixes_csv(*args, columns=None, tz=None, index_col=object(), crs def write_positionfixes_csv(positionfixes, filename, *args, **kwargs): """ Write positionfixes to csv file. - - Wraps the pandas to_csv function, but strips the geometry column ('geom') and + + Wraps the pandas to_csv function, but strips the geometry column ('geom') and stores the longitude and latitude in respective columns. Parameters ---------- positionfixes : GeoDataFrame (as trackintel positionfixes) The positionfixes to store to the CSV file. - + filename : str The file to write to. """ @@ -114,7 +114,7 @@ def write_positionfixes_csv(positionfixes, filename, *args, **kwargs): def read_triplegs_csv(*args, columns=None, tz=None, index_col=object(), crs=None, **kwargs): """ Read triplegs from csv file. - + Wraps the pandas read_csv function, extracts a WKT for the leg geometry and builds a geopandas GeoDataFrame. This also validates that the ingested data conforms to the trackintel understanding of triplegs (see :doc:`/modules/model`). @@ -123,14 +123,14 @@ def read_triplegs_csv(*args, columns=None, tz=None, index_col=object(), crs=None ---------- columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + index_col : str, optional - column name to be used as index. If None the default index is assumed + column name to be used as index. If None the default index is assumed as unique identifier. - + crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string @@ -140,7 +140,7 @@ def read_triplegs_csv(*args, columns=None, tz=None, index_col=object(), crs=None ------- tpls : GeoDataFrame (as trackintel triplegs) A GeoDataFrame containing the triplegs. - + Examples -------- >>> trackintel.read_triplegs_csv('data.csv') @@ -182,15 +182,15 @@ def read_triplegs_csv(*args, columns=None, tz=None, index_col=object(), crs=None def write_triplegs_csv(triplegs, filename, *args, **kwargs): """ Write triplegs to csv file. - - Wraps the pandas to_csv function, but transforms the geom into WKT + + Wraps the pandas to_csv function, but transforms the geom into WKT before writing. Parameters ---------- triplegs : GeoDataFrame (as trackintel triplegs) The triplegs to store to the CSV file. - + filename : str The file to write to. """ @@ -203,24 +203,24 @@ def write_triplegs_csv(triplegs, filename, *args, **kwargs): def read_staypoints_csv(*args, columns=None, tz=None, index_col=object(), crs=None, **kwargs): """ Read staypoints from csv file. - - Wraps the pandas read_csv function, extracts a WKT for the staypoint - geometry and builds a geopandas GeoDataFrame. This also validates that - the ingested data conforms to the trackintel understanding of staypoints + + Wraps the pandas read_csv function, extracts a WKT for the staypoint + geometry and builds a geopandas GeoDataFrame. This also validates that + the ingested data conforms to the trackintel understanding of staypoints (see :doc:`/modules/model`). - + Parameters ---------- columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + index_col : str, optional - column name to be used as index. If None the default index is assumed + column name to be used as index. If None the default index is assumed as unique identifier. - + crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string @@ -230,7 +230,7 @@ def read_staypoints_csv(*args, columns=None, tz=None, index_col=object(), crs=No ------- stps : GeoDataFrame (as trackintel staypoints) A GeoDataFrame containing the staypoints. - + Examples -------- >>> trackintel.read_staypoints_csv('data.csv') @@ -272,15 +272,15 @@ def read_staypoints_csv(*args, columns=None, tz=None, index_col=object(), crs=No def write_staypoints_csv(staypoints, filename, *args, **kwargs): """ Write staypoints to csv file. - - Wraps the pandas to_csv function, but transforms the geom into WKT + + Wraps the pandas to_csv function, but transforms the geom into WKT before writing. Parameters ---------- staypoints : GeoDataFrame (as trackintel staypoints) The staypoints to store to the CSV file. - + filename : str The file to write to. """ @@ -293,21 +293,21 @@ def write_staypoints_csv(staypoints, filename, *args, **kwargs): def read_locations_csv(*args, columns=None, index_col=object(), crs=None, **kwargs): """ Read locations from csv file. - - Wraps the pandas read_csv function, extracts a WKT for the location - center (and extent) and builds a geopandas GeoDataFrame. This also - validates that the ingested data conforms to the trackintel understanding + + Wraps the pandas read_csv function, extracts a WKT for the location + center (and extent) and builds a geopandas GeoDataFrame. This also + validates that the ingested data conforms to the trackintel understanding of locations (see :doc:`/modules/model`). Parameters ---------- columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. - + index_col : str, optional column name to be used as index. If None the default index is assumed as unique identifier. - + crs : pyproj.crs or str, optional Set coordinate reference system. The value can be anything accepted by pyproj.CRS.from_user_input(), such as an authority string @@ -352,15 +352,15 @@ def read_locations_csv(*args, columns=None, index_col=object(), crs=None, **kwar def write_locations_csv(locations, filename, *args, **kwargs): """ Write locations to csv file. - - Wraps the pandas to_csv function, but transforms the center (and + + Wraps the pandas to_csv function, but transforms the center (and extent) into WKT before writing. Parameters ---------- locations : GeoDataFrame (as trackintel locations) The locations to store to the CSV file. - + filename : str The file to write to. """ @@ -374,28 +374,28 @@ def write_locations_csv(locations, filename, *args, **kwargs): def read_trips_csv(*args, columns=None, tz=None, index_col=object(), **kwargs): """ Read trips from csv file. - + Wraps the pandas read_csv function and extracts proper datetimes. This also - validates that the ingested data conforms to the trackintel understanding + validates that the ingested data conforms to the trackintel understanding of trips (see :doc:`/modules/model`). - + Parameters ---------- columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + index_col : str, optional - column name to be used as index. If None the default index is assumed + column name to be used as index. If None the default index is assumed as unique identifier. - + Returns ------- trips : DataFrame (as trackintel trips) A DataFrame containing the trips. - + Examples -------- >>> trackintel.read_trips_csv('data.csv') @@ -430,14 +430,14 @@ def read_trips_csv(*args, columns=None, tz=None, index_col=object(), **kwargs): def write_trips_csv(trips, filename, *args, **kwargs): """ Write trips to csv file. - + Wraps the pandas to_csv function. Parameters ---------- trips : DataFrame (as trackintel trips) The trips to store to the CSV file. - + filename : str The file to write to. """ @@ -448,16 +448,16 @@ def write_trips_csv(trips, filename, *args, **kwargs): def read_tours_csv(*args, columns=None, tz=None, **kwargs): """ Read tours from csv file. - + Wraps the pandas read_csv function and extracts proper datetimes. This also - validates that the ingested data conforms to the trackintel understanding + validates that the ingested data conforms to the trackintel understanding of tours (see :doc:`/modules/model`). - + Parameters ---------- columns : dict, optional The column names to rename in the format {'old_name':'trackintel_standard_name'}. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. @@ -482,14 +482,14 @@ def read_tours_csv(*args, columns=None, tz=None, **kwargs): def write_tours_csv(tours, filename, *args, **kwargs): """ Write tours to csv file. - + Wraps the pandas to_csv function. Parameters ---------- tours : DataFrame (as trackintel tours) The tours to store to the CSV file. - + filename : str The file to write to. """ @@ -505,10 +505,10 @@ def _localize_timestamp(dt_series, pytz_tzinfo, col_name): ---------- dt_series : pandas.Series a pandas datetime series - + pytz_tzinfo : str pytz compatible timezone string. If none UTC will be assumed - + col_name : str Column name for informative warning message diff --git a/trackintel/io/from_geopandas.py b/trackintel/io/from_geopandas.py index 8e28ba72..5cd30bd4 100644 --- a/trackintel/io/from_geopandas.py +++ b/trackintel/io/from_geopandas.py @@ -6,26 +6,26 @@ def read_positionfixes_gpd(gdf, tracked_at="tracked_at", user_id="user_id", geom="geom", tz=None, mapper={}): """ Read positionfixes from GeoDataFrames. - + Warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the positionfixes to import - + tracked_at : str, default 'tracked_at' name of the column storing the timestamps. - + user_id : str, default 'user_id' name of the column storing the user_id. - + geom : str, default 'geom' name of the column storing the geometry. - + tz : str, optional pytz compatible timezone string. If None UTC will be assumed - + mapper : dict, optional further columns that should be renamed. @@ -33,7 +33,7 @@ def read_positionfixes_gpd(gdf, tracked_at="tracked_at", user_id="user_id", geom ------- pfs : GeoDataFrame (as trackintel positionfixes) A GeoDataFrame containing the positionfixes. - + Examples -------- >>> trackintel.read_positionfixes_gpd(gdf, user_id='User', geom='geometry', tz='utc') @@ -58,29 +58,29 @@ def read_staypoints_gpd( ): """ Read staypoints from GeoDataFrames. - + Warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the staypoints to import - + started_at : str, default 'started_at' name of the column storing the starttime of the staypoints. - + finished_at : str, default 'finished_at' name of the column storing the endtime of the staypoints. - + user_id : str, default 'user_id' name of the column storing the user_id. - + geom : str, default 'geom' name of the column storing the geometry. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + mapper : dict, optional further columns that should be renamed. @@ -88,7 +88,7 @@ def read_staypoints_gpd( ------- stps : GeoDataFrame (as trackintel staypoints) A GeoDataFrame containing the staypoints - + Examples -------- >>> trackintel.read_staypoints_gpd(gdf, started_at='start_time', finished_at='end_time', tz='utc') @@ -113,29 +113,29 @@ def read_triplegs_gpd( ): """ Read triplegs from GeoDataFrames. - + warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid line geometry, containing the triplegs to import. - + started_at : str, default 'started_at' name of the column storing the starttime of the triplegs. - + finished_at : str, default 'finished_at' name of the column storing the endtime of the triplegs. - + user_id : str, default 'user_id' name of the column storing the user_id. - + geom : str, default 'geom' name of the column storing the geometry. - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + mapper : dict, optional further columns that should be renamed. @@ -143,7 +143,7 @@ def read_triplegs_gpd( ------- tpls : GeoDataFrame (as trackintel triplegs) A GeoDataFrame containing the triplegs - + Examples -------- >>> trackintel.read_triplegs_gpd(gdf, user_id='User', geom='geometry', tz='utc') @@ -175,32 +175,32 @@ def read_trips_gpd( ): """ Read trips from GeoDataFrames/DataFrames. - + Warps the pd.rename function to simplify the import of GeoDataFrames (DataFrames). Parameters ---------- gdf : GeoDataFrame or DataFrame GeoDataFrame/DataFrame containing the trips to import. - + started_at : str, default 'started_at' name of the column storing the starttime of the staypoints. - + finished_at : str, default 'finished_at' name of the column storing the endtime of the staypoints. - + user_id : str, default 'user_id' name of the column storing the user_id. - + origin_staypoint_id : str, default 'origin_staypoint_id' name of the column storing the staypoint_id of the start of the tripleg - + destination_staypoint_id : str, default 'destination_staypoint_id' name of the column storing the staypoint_id of the end of the tripleg - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + mapper : dict, optional further columns that should be renamed. @@ -208,7 +208,7 @@ def read_trips_gpd( ------- trips : GeoDataFrame/DataFrame (as trackintel trips) A GeoDataFrame/DataFrame containing the trips. - + Examples -------- >>> trackintel.read_trips_gpd(df, tz='utc') @@ -236,23 +236,23 @@ def read_trips_gpd( def read_locations_gpd(gdf, user_id="user_id", center="center", mapper={}): """ Read locations from GeoDataFrames. - + Warps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the locations to import. - + user_id : str, default 'user_id' name of the column storing the user_id. - + center : str, default 'center' name of the column storing the geometry (Center of the location). - + tz : str, optional pytz compatible timezone string. If None UTC is assumed. - + mapper : dict, optional further columns that should be renamed. @@ -260,7 +260,7 @@ def read_locations_gpd(gdf, user_id="user_id", center="center", mapper={}): ------- locs : GeoDataFrame (as trackintel locations) A GeoDataFrame containing the locations. - + Examples -------- >>> trackintel.read_locations_gpd(df, user_id='User', center='geometry') @@ -287,29 +287,29 @@ def read_tours_gpd( ): """ Read tours from GeoDataFrames. - + Wraps the pd.rename function to simplify the import of GeoDataFrames. Parameters ---------- gdf : GeoDataFrame GeoDataFrame with valid point geometry, containing the locations to import. - + user_id : str, default 'user_id' name of the column storing the user_id. - + started_at : str, default 'started_at' name of the column storing the starttime of the staypoints. - + finished_at : str, default 'finished_at' name of the column storing the endtime of the staypoints. - + origin_destination_location_id : str, default 'origin_destination_location_id' the name of the column storing the id of the location where the tour starts and ends. - + journey : str, default 'journey' name of the column storing the information (bool) if the tour is a journey. - + mapper : dict, optional further columns that should be renamed. diff --git a/trackintel/io/postgis.py b/trackintel/io/postgis.py index df61ad57..492f5326 100644 --- a/trackintel/io/postgis.py +++ b/trackintel/io/postgis.py @@ -10,9 +10,9 @@ def read_positionfixes_postgis(conn_string, table_name, geom_col="geom", *args, Parameters ---------- conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The table to read the positionfixes from. @@ -21,7 +21,7 @@ def read_positionfixes_postgis(conn_string, table_name, geom_col="geom", *args, *args Further arguments as available in GeoPanda's GeoDataFrame.from_postgis(). - + **kwargs Further arguments as available in GeoPanda's GeoDataFrame.from_postgis(). @@ -43,7 +43,7 @@ def read_positionfixes_postgis(conn_string, table_name, geom_col="geom", *args, def write_positionfixes_postgis( positionfixes, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace" ): - """Stores positionfixes to PostGIS. Usually, this is directly called on a positionfixes + """Stores positionfixes to PostGIS. Usually, this is directly called on a positionfixes DataFrame (see example below). **Attention!** This replaces the table if it already exists! @@ -54,9 +54,9 @@ def write_positionfixes_postgis( The positionfixes to store to the database. conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The name of the table to write to. @@ -108,14 +108,14 @@ def read_triplegs_postgis(conn_string, table_name, geom_col="geom", *args, **kwa Parameters ---------- conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The table to read the triplegs from. geom_col : str, default 'geom' - The geometry column of the table. + The geometry column of the table. Returns ------- @@ -135,7 +135,7 @@ def read_triplegs_postgis(conn_string, table_name, geom_col="geom", *args, **kwa def write_triplegs_postgis(triplegs, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): - """Stores triplegs to PostGIS. Usually, this is directly called on a triplegs + """Stores triplegs to PostGIS. Usually, this is directly called on a triplegs DataFrame (see example below). **Attention!** This replaces the table if it already exists! @@ -146,9 +146,9 @@ def write_triplegs_postgis(triplegs, conn_string, table_name, schema=None, sql_c The triplegs to store to the database. conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The name of the table to write to. @@ -196,14 +196,14 @@ def read_staypoints_postgis(conn_string, table_name, geom_col="geom", *args, **k Parameters ---------- conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The table to read the staypoints from. geom_col : str, default 'geom' - The geometry column of the table. + The geometry column of the table. Returns ------- @@ -223,7 +223,7 @@ def read_staypoints_postgis(conn_string, table_name, geom_col="geom", *args, **k def write_staypoints_postgis(staypoints, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): - """Stores staypoints to PostGIS. Usually, this is directly called on a staypoints + """Stores staypoints to PostGIS. Usually, this is directly called on a staypoints DataFrame (see example below). **Attention!** This replaces the table if it already exists! @@ -234,9 +234,9 @@ def write_staypoints_postgis(staypoints, conn_string, table_name, schema=None, s The staypoints to store to the database. conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The name of the table to write to. @@ -289,14 +289,14 @@ def read_locations_postgis(conn_string, table_name, geom_col="geom", *args, **kw Parameters ---------- conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The table to read the locations from. geom_col : str, default 'geom' - The geometry column of the table. + The geometry column of the table. Returns ------- @@ -316,7 +316,7 @@ def read_locations_postgis(conn_string, table_name, geom_col="geom", *args, **kw def write_locations_postgis(locations, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): - """Stores locations to PostGIS. Usually, this is directly called on a locations + """Stores locations to PostGIS. Usually, this is directly called on a locations GeoDataFrame (see example below). **Attention!** This replaces the table if it already exists! @@ -327,9 +327,9 @@ def write_locations_postgis(locations, conn_string, table_name, schema=None, sql The locations to store to the database. conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The name of the table to write to. @@ -378,9 +378,9 @@ def read_trips_postgis(conn_string, table_name, *args, **kwargs): Parameters ---------- conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The table to read the trips from. @@ -400,7 +400,7 @@ def read_trips_postgis(conn_string, table_name, *args, **kwargs): def write_trips_postgis(trips, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): - """Stores trips to PostGIS. Usually, this is directly called on a trips + """Stores trips to PostGIS. Usually, this is directly called on a trips DataFrame (see example below). **Attention!** This replaces the table if it already exists! @@ -411,9 +411,9 @@ def write_trips_postgis(trips, conn_string, table_name, schema=None, sql_chunksi The trips to store to the database. conn_string : str - A connection string to connect to a database, e.g., + A connection string to connect to a database, e.g., ``postgresql://username:password@host:socket/database``. - + table_name : str The name of the table to write to. diff --git a/trackintel/model/locations.py b/trackintel/model/locations.py index c1529384..d6dfeea0 100644 --- a/trackintel/model/locations.py +++ b/trackintel/model/locations.py @@ -9,20 +9,20 @@ @pd.api.extensions.register_dataframe_accessor("as_locations") class LocationsAccessor(object): """A pandas accessor to treat (Geo)DataFrames as collections of locations. - + This will define certain methods and accessors, as well as make sure that the DataFrame adheres to some requirements. - Requires at least the following columns: + Requires at least the following columns: ['user_id', 'center'] For several usecases, the following additional columns are required: ['elevation', 'context', 'extent'] - + Notes ----- `Locations` are spatially aggregated `Staypoints` where a user frequently visits. - + Examples -------- >>> df.as_locations.plot() @@ -51,7 +51,7 @@ def _validate(obj): def plot(self, *args, **kwargs): """ Plot this collection of locations. - + See :func:`trackintel.visualization.locations.plot_center_of_locations`. """ ti.visualization.locations.plot_center_of_locations(self._obj, *args, **kwargs) @@ -59,7 +59,7 @@ def plot(self, *args, **kwargs): def to_csv(self, filename, *args, **kwargs): """ Store this collection of locations as a CSV file. - + See :func:`trackintel.io.file.write_locations_csv`. """ ti.io.file.write_locations_csv(self._obj, filename, *args, **kwargs) @@ -67,7 +67,7 @@ def to_csv(self, filename, *args, **kwargs): def to_postgis(self, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): """ Store this collection of locations to PostGIS. - + See :func:`trackintel.io.postgis.write_locations_postgis`. """ ti.io.postgis.write_locations_postgis(self._obj, conn_string, table_name, schema, sql_chunksize, if_exists) @@ -75,7 +75,7 @@ def to_postgis(self, conn_string, table_name, schema=None, sql_chunksize=None, i def spatial_filter(self, *args, **kwargs): """ Filter locations with a geo extent. - + See :func:`trackintel.preprocessing.filter.spatial_filter`. """ return ti.preprocessing.filter.spatial_filter(self._obj, *args, **kwargs) diff --git a/trackintel/model/positionfixes.py b/trackintel/model/positionfixes.py index f2d39731..6fb9d129 100644 --- a/trackintel/model/positionfixes.py +++ b/trackintel/model/positionfixes.py @@ -6,11 +6,11 @@ @pd.api.extensions.register_dataframe_accessor("as_positionfixes") class PositionfixesAccessor(object): """A pandas accessor to treat (Geo)DataFrames as collections of `Positionfixes`. - + This will define certain methods and accessors, as well as make sure that the DataFrame adheres to some requirements. - Requires at least the following columns: + Requires at least the following columns: ['user_id', 'tracked_at'] Requires valid point geometries; the 'index' of the GeoDataFrame will be treated as unique identifier @@ -71,7 +71,7 @@ def center(self): def generate_staypoints(self, *args, **kwargs): """ Generate staypoints from this collection of positionfixes. - + See :func:`trackintel.preprocessing.positionfixes.generate_staypoints`. """ return ti.preprocessing.positionfixes.generate_staypoints(self._obj, *args, **kwargs) @@ -79,7 +79,7 @@ def generate_staypoints(self, *args, **kwargs): def generate_triplegs(self, stps_input=None, *args, **kwargs): """ Generate triplegs from this collection of positionfixes. - + See :func:`trackintel.preprocessing.positionfixes.generate_triplegs`. """ return ti.preprocessing.positionfixes.generate_triplegs(self._obj, stps_input, *args, **kwargs) @@ -87,7 +87,7 @@ def generate_triplegs(self, stps_input=None, *args, **kwargs): def plot(self, *args, **kwargs): """ Plot this collection of positionfixes. - + See :func:`trackintel.visualization.positionfixes.plot_positionfixes`. """ ti.visualization.positionfixes.plot_positionfixes(self._obj, *args, **kwargs) @@ -95,7 +95,7 @@ def plot(self, *args, **kwargs): def to_csv(self, filename, *args, **kwargs): """ Store this collection of trackpoints as a CSV file. - + See :func:`trackintel.io.file.write_positionfixes_csv`. """ ti.io.file.write_positionfixes_csv(self._obj, filename, *args, **kwargs) @@ -103,7 +103,7 @@ def to_csv(self, filename, *args, **kwargs): def to_postgis(self, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): """ Store this collection of positionfixes to PostGIS. - + See :func:`trackintel.io.postgis.write_positionfixes_postgis`. """ ti.io.postgis.write_positionfixes_postgis(self._obj, conn_string, table_name, schema, sql_chunksize, if_exists) @@ -111,7 +111,7 @@ def to_postgis(self, conn_string, table_name, schema=None, sql_chunksize=None, i def calculate_distance_matrix(self, *args, **kwargs): """ Calculate pair-wise distance among positionfixes or to other positionfixes. - + See :func:'trackintel.geogr.distances.calculate_distance_matrix'. """ return ti.geogr.distances.calculate_distance_matrix(self._obj, *args, **kwargs) diff --git a/trackintel/model/staypoints.py b/trackintel/model/staypoints.py index 400d9f09..86522b60 100644 --- a/trackintel/model/staypoints.py +++ b/trackintel/model/staypoints.py @@ -6,11 +6,11 @@ @pd.api.extensions.register_dataframe_accessor("as_staypoints") class StaypointsAccessor(object): """A pandas accessor to treat (Geo)DataFrames as collections of `Staypoints`. - + This will define certain methods and accessors, as well as make sure that the DataFrame adheres to some requirements. - Requires at least the following columns: + Requires at least the following columns: ['user_id', 'started_at', 'finished_at'] Requires valid point geometries; the 'index' of the GeoDataFrame will be treated as unique identifier @@ -22,8 +22,8 @@ class StaypointsAccessor(object): Notes ----- - `Staypoints` are defined as location were a person did not move for a while. - Under consideration of location uncertainty this means that a person stays within + `Staypoints` are defined as location were a person did not move for a while. + Under consideration of location uncertainty this means that a person stays within a certain radius for a certain amount of time. The exact definition is use-case dependent. @@ -74,7 +74,7 @@ def center(self): def generate_locations(self, *args, **kwargs): """ Generate locations from this collection of staypoints. - + See :func:`trackintel.preprocessing.staypoints.generate_locations`. """ return ti.preprocessing.staypoints.generate_locations(self._obj, *args, **kwargs) @@ -82,7 +82,7 @@ def generate_locations(self, *args, **kwargs): def create_activity_flag(self, *args, **kwargs): """ Set a flag if a staypoint is also an activity. - + See :func:`trackintel.preprocessing.staypoints.create_activity_flag`. """ return ti.preprocessing.staypoints.create_activity_flag(self._obj, *args, **kwargs) @@ -90,7 +90,7 @@ def create_activity_flag(self, *args, **kwargs): def spatial_filter(self, *args, **kwargs): """ Filter staypoints with a geo extent. - + See :func:`trackintel.preprocessing.filter.spatial_filter`. """ return ti.preprocessing.filter.spatial_filter(self._obj, *args, **kwargs) @@ -98,7 +98,7 @@ def spatial_filter(self, *args, **kwargs): def plot(self, *args, **kwargs): """ Plot this collection of staypoints. - + See :func:`trackintel.visualization.staypoints.plot_staypoints`. """ ti.visualization.staypoints.plot_staypoints(self._obj, *args, **kwargs) @@ -106,7 +106,7 @@ def plot(self, *args, **kwargs): def to_csv(self, filename, *args, **kwargs): """ Store this collection of staypoints as a CSV file. - + See :func:`trackintel.io.file.write_staypoints_csv`. """ ti.io.file.write_staypoints_csv(self._obj, filename, *args, **kwargs) @@ -114,7 +114,7 @@ def to_csv(self, filename, *args, **kwargs): def to_postgis(self, conn_string, table_name): """ Store this collection of staypoints to PostGIS. - + See :func:`trackintel.io.postgis.write_staypoints_postgis`. """ ti.io.postgis.write_staypoints_postgis(self._obj, conn_string, table_name) diff --git a/trackintel/model/tours.py b/trackintel/model/tours.py index 19395ee5..010783e8 100644 --- a/trackintel/model/tours.py +++ b/trackintel/model/tours.py @@ -5,7 +5,7 @@ class ToursAccessor(object): """A pandas accessor to treat DataFrames as collections of `Tours`. - Requires at least the following columns: + Requires at least the following columns: ['user_id', 'started_at', 'finished_at', 'origin_staypoint_id', 'journey'] The 'index' of the GeoDataFrame will be treated as unique identifier of the `Tours` @@ -51,7 +51,7 @@ def _validate(obj): def to_csv(self, filename, *args, **kwargs): """ Store this collection of tours as a CSV file. - + See :func:`trackintel.io.file.write_tours_csv`. """ raise NotImplementedError @@ -59,7 +59,7 @@ def to_csv(self, filename, *args, **kwargs): def plot(self, *args, **kwargs): """ Plot this collection of tours. - + See :func:`trackintel.visualization.tours.plot_tours`. """ raise NotImplementedError diff --git a/trackintel/model/triplegs.py b/trackintel/model/triplegs.py index 3d4f6975..a7ebd68d 100644 --- a/trackintel/model/triplegs.py +++ b/trackintel/model/triplegs.py @@ -8,11 +8,11 @@ @pd.api.extensions.register_dataframe_accessor("as_triplegs") class TriplegsAccessor(object): """A pandas accessor to treat (Geo)DataFrames as collections of `Tripleg`. - + This will define certain methods and accessors, as well as make sure that the DataFrame adheres to some requirements. - Requires at least the following columns: + Requires at least the following columns: ['user_id', 'started_at', 'finished_at'] Requires valid line geometries; the 'index' of the GeoDataFrame will be treated as unique identifier @@ -66,7 +66,7 @@ def _validate(obj): def plot(self, *args, **kwargs): """ Plot this collection of triplegs. - + See :func:`trackintel.visualization.triplegs.plot_triplegs`. """ ti.visualization.triplegs.plot_triplegs(self._obj, *args, **kwargs) @@ -74,7 +74,7 @@ def plot(self, *args, **kwargs): def to_csv(self, filename, *args, **kwargs): """ Store this collection of triplegs as a CSV file. - + See :func:`trackintel.io.file.write_triplegs_csv`. """ ti.io.file.write_triplegs_csv(self._obj, filename, *args, **kwargs) @@ -82,7 +82,7 @@ def to_csv(self, filename, *args, **kwargs): def to_postgis(self, conn_string, table_name): """ Store this collection of triplegs to PostGIS. - + See :func:`trackintel.io.postgis.store_positionfixes_postgis`. """ ti.io.postgis.write_triplegs_postgis(self._obj, conn_string, table_name) @@ -90,7 +90,7 @@ def to_postgis(self, conn_string, table_name): def calculate_distance_matrix(self, *args, **kwargs): """ Calculate pair-wise distance among triplegs or to other triplegs. - + See :func:`trackintel.geogr.distances.calculate_distance_matrix`. """ return ti.geogr.distances.calculate_distance_matrix(self._obj, *args, **kwargs) @@ -98,7 +98,7 @@ def calculate_distance_matrix(self, *args, **kwargs): def spatial_filter(self, *args, **kwargs): """ Filter triplegs with a geo extent. - + See :func:`trackintel.preprocessing.filter.spatial_filter`. """ return ti.preprocessing.filter.spatial_filter(self._obj, *args, **kwargs) @@ -106,7 +106,7 @@ def spatial_filter(self, *args, **kwargs): def predict_transport_mode(self, *args, **kwargs): """ Predict/impute the transport mode with which each tripleg was likely covered. - + See :func:`trackintel.analysis.transport_mode_identification.predict_transport_mode`. """ return ti.analysis.transport_mode_identification.predict_transport_mode(self._obj, *args, **kwargs) @@ -114,7 +114,7 @@ def predict_transport_mode(self, *args, **kwargs): def calculate_modal_split(self, *args, **kwargs): """ Calculate the modal split of the triplegs. - + See :func:`trackintel.analysis.modal_split.calculate_modal_split`. """ return ti.analysis.modal_split.calculate_modal_split(self._obj, *args, **kwargs) diff --git a/trackintel/model/trips.py b/trackintel/model/trips.py index c6490906..7f67cebd 100644 --- a/trackintel/model/trips.py +++ b/trackintel/model/trips.py @@ -63,8 +63,8 @@ def _validate(obj): def plot(self, *args, **kwargs): """ - Plot this collection of trips. - + Plot this collection of trips. + See :func:`trackintel.visualization.trips.plot_trips`. """ raise NotImplementedError @@ -72,7 +72,7 @@ def plot(self, *args, **kwargs): def to_csv(self, filename, *args, **kwargs): """ Store this collection of trips as a CSV file. - + See :func:`trackintel.io.file.write_trips_csv`. """ ti.io.file.write_trips_csv(self._obj, filename, *args, **kwargs) @@ -80,7 +80,7 @@ def to_csv(self, filename, *args, **kwargs): def to_postgis(self, conn_string, table_name, schema=None, sql_chunksize=None, if_exists="replace"): """ Store this collection of trips to PostGIS. - + See :func:`trackintel.io.postgis.write_trips_postgis`. """ ti.io.postgis.write_trips_postgis(self._obj, conn_string, table_name, schema, sql_chunksize, if_exists) diff --git a/trackintel/model/users.py b/trackintel/model/users.py index b72d3278..01f391c4 100644 --- a/trackintel/model/users.py +++ b/trackintel/model/users.py @@ -5,11 +5,11 @@ @pd.api.extensions.register_dataframe_accessor("as_users") class UsersAccessor(object): """A pandas accessor to treat DataFrames as collections of users. - + This will define certain methods and accessors, as well as make sure that the DataFrame adheres to some requirements. - Requires at least the following columns: + Requires at least the following columns: ['id'] For several usecases, the following additional columns are required: @@ -38,7 +38,7 @@ def _validate(obj): def plot_home_and_work(self): """ Plot home and work locations of users. - + See :func:`trackintel.visualization.users.plot_home_and_work`. """ raise NotImplementedError diff --git a/trackintel/preprocessing/positionfixes.py b/trackintel/preprocessing/positionfixes.py index 3ab66cfd..650c5a34 100644 --- a/trackintel/preprocessing/positionfixes.py +++ b/trackintel/preprocessing/positionfixes.py @@ -22,7 +22,7 @@ def generate_staypoints( ): """ Generate staypoints from positionfixes. - + Parameters ---------- pfs_input : GeoDataFrame (as trackintel positionfixes) @@ -30,56 +30,56 @@ def generate_staypoints( method : {'sliding'} Method to create staypoints. 'sliding' applies a sliding window over the data. - + distance_metric : {'haversine'} The distance metric used by the applied method. - + dist_threshold : float, default 100 The distance threshold for the 'sliding' method, i.e., how far someone has to travel to generate a new staypoint. Units depend on the dist_func parameter. time_threshold : float, default 5.0 (minutes) The time threshold for the 'sliding' method in minutes. - + gap_threshold : float, default 1e6 (minutes) - The time threshold of determine whether a gap exists between consecutive pfs. Staypoints + The time threshold of determine whether a gap exists between consecutive pfs. Staypoints will not be generated between gaps. Only valid in 'sliding' method. - + include_last: boolen, default False The original algorithm (see Li et al. (2008)) only detects staypoint if the user steps out of that staypoint. This will omit the last staypoint (if any). Set 'include_last' to True to include this last staypoint. - + print_progress: boolen, default False Show per-user progress if set to True. - + Returns ------- pfs: GeoDataFrame (as trackintel positionfixes) The original positionfixes with a new column ``[`staypoint_id`]``. - + stps: GeoDataFrame (as trackintel staypoints) The generated staypoints. - + Notes ----- The 'sliding' method is adapted from Li et al. (2008). In the original algorithm, the 'finished_at' - time for the current staypoint lasts until the 'tracked_at' time of the first positionfix outside - this staypoint. This implies potential tracking gaps may be included in staypoints, and users + time for the current staypoint lasts until the 'tracked_at' time of the first positionfix outside + this staypoint. This implies potential tracking gaps may be included in staypoints, and users are assumed to be stationary during this missing period. To avoid including too large gaps, set 'gap_threshold' parameter to a small value, e.g., 15 min. - + Examples -------- >>> pfs.as_positionfixes.generate_staypoints('sliding', dist_threshold=100) References ---------- - Zheng, Y. (2015). Trajectory data mining: an overview. ACM Transactions on Intelligent Systems + Zheng, Y. (2015). Trajectory data mining: an overview. ACM Transactions on Intelligent Systems and Technology (TIST), 6(3), 29. - Li, Q., Zheng, Y., Xie, X., Chen, Y., Liu, W., & Ma, W. Y. (2008, November). Mining user - similarity based on location history. In Proceedings of the 16th ACM SIGSPATIAL international + Li, Q., Zheng, Y., Xie, X., Chen, Y., Liu, W., & Ma, W. Y. (2008, November). Mining user + similarity based on location history. In Proceedings of the 16th ACM SIGSPATIAL international conference on Advances in geographic information systems (p. 34). ACM. """ # copy the original pfs for adding 'staypoint_id' column @@ -171,7 +171,7 @@ def generate_triplegs(pfs_input, stps_input, method="between_staypoints", gap_th Parameters ---------- pfs_input : GeoDataFrame (as trackintel positionfixes) - The positionfixes have to follow the standard definition for positionfixes DataFrames. + The positionfixes have to follow the standard definition for positionfixes DataFrames. If 'staypoint_id' column is not found, stps_input needs to be given. stps_input : GeoDataFrame (as trackintel staypoints), optional @@ -179,31 +179,31 @@ def generate_triplegs(pfs_input, stps_input, method="between_staypoints", gap_th positionfixes need 'staypoint_id' associated with them. method: {'between_staypoints'} - Method to create triplegs. 'between_staypoints' method defines a tripleg as all positionfixes - between two staypoints (no overlap). This method requires either a column 'staypoint_id' on + Method to create triplegs. 'between_staypoints' method defines a tripleg as all positionfixes + between two staypoints (no overlap). This method requires either a column 'staypoint_id' on the positionfixes or passing staypoints as an input. - + gap_threshold: float, default 15 (minutes) - Maximum allowed temporal gap size in minutes. If tracking data is missing for more than + Maximum allowed temporal gap size in minutes. If tracking data is missing for more than `gap_threshold` minutes, a new tripleg will be generated. Returns ------- pfs: GeoDataFrame (as trackintel positionfixes) The original positionfixes with a new column ``[`tripleg_id`]``. - + tpls: GeoDataFrame (as trackintel triplegs) The generated triplegs. Notes ----- - Methods 'between_staypoints' requires either a column 'staypoint_id' on the - positionfixes or passing some staypoints that correspond to the positionfixes! + Methods 'between_staypoints' requires either a column 'staypoint_id' on the + positionfixes or passing some staypoints that correspond to the positionfixes! This means you usually should call ``generate_staypoints()`` first. - - The first positionfix after a staypoint is regarded as the first positionfix of the - generated tripleg. The generated tripleg will not have overlapping positionfix with - the existing staypoints. This means a small temporal gap in user's trace will occur + + The first positionfix after a staypoint is regarded as the first positionfix of the + generated tripleg. The generated tripleg will not have overlapping positionfix with + the existing staypoints. This means a small temporal gap in user's trace will occur between the first positionfix of staypoint and the last positionfix of tripleg: pfs_stp_first['tracked_at'] - pfs_tpl_last['tracked_at']. diff --git a/trackintel/visualization/osm.py b/trackintel/visualization/osm.py index 5d0963f1..992402ff 100644 --- a/trackintel/visualization/osm.py +++ b/trackintel/visualization/osm.py @@ -12,13 +12,13 @@ def plot_osm_streets(north, south, east, west, ax): ---------- north : float The northernmost coordinate (to retrieve OSM data for). - + south : float The southernmost coordinate. east : float The easternmost coordinate. - + west : float The westernmost coordinate. """