From d364198301ac4114b9fa8fe6f0330811afac311b Mon Sep 17 00:00:00 2001 From: jlarsen Date: Thu, 27 Jun 2024 16:16:43 -0700 Subject: [PATCH 1/3] update(nldi.py): Add CRS information--"EPSG:4236"--to GeoDataFame objects --- dataretrieval/nldi.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dataretrieval/nldi.py b/dataretrieval/nldi.py index 4c850a2..30a49c8 100644 --- a/dataretrieval/nldi.py +++ b/dataretrieval/nldi.py @@ -10,6 +10,7 @@ NLDI_API_BASE_URL = 'https://labs.waterdata.usgs.gov/api/nldi/linked-data' _AVAILABLE_DATA_SOURCES = None +_CRS = "EPSG:4326" def _query_nldi(url, query_params, error_message): @@ -101,7 +102,7 @@ def get_flowlines( feature_collection = _query_nldi(url, query_params, err_msg) if as_json: return feature_collection - gdf = gpd.GeoDataFrame.from_features(feature_collection) + gdf = gpd.GeoDataFrame.from_features(feature_collection, crs=_CRS) return gdf @@ -154,7 +155,7 @@ def get_basin( feature_collection = _query_nldi(url, query_params, err_msg) if as_json: return feature_collection - gdf = gpd.GeoDataFrame.from_features(feature_collection) + gdf = gpd.GeoDataFrame.from_features(feature_collection, crs=_CRS) return gdf @@ -291,7 +292,7 @@ def get_features( feature_collection = _query_nldi(url, query_params, err_msg) if as_json: return feature_collection - gdf = gpd.GeoDataFrame.from_features(feature_collection) + gdf = gpd.GeoDataFrame.from_features(feature_collection, crs=_CRS) return gdf @@ -322,7 +323,7 @@ def get_features_by_data_source(data_source: str) -> gpd.GeoDataFrame: url = f'{NLDI_API_BASE_URL}/{data_source}' err_msg = f"Error getting features for data source '{data_source}'" feature_collection = _query_nldi(url, {}, err_msg) - gdf = gpd.GeoDataFrame.from_features(feature_collection) + gdf = gpd.GeoDataFrame.from_features(feature_collection, crs=_CRS) return gdf From 63dfffc02a89f55471f9164aad442cac626f98bd Mon Sep 17 00:00:00 2001 From: jlarsen Date: Tue, 16 Jul 2024 11:24:29 -0700 Subject: [PATCH 2/3] feat(geopandas support): return GeoDataFrame if geopandas is installed. --- dataretrieval/nwis.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py index 1c72714..85cc5e3 100644 --- a/dataretrieval/nwis.py +++ b/dataretrieval/nwis.py @@ -23,6 +23,11 @@ from .utils import query +try: + import geopandas as gpd +except ImportError: + gpd = None + WATERDATA_BASE_URL = 'https://nwis.waterdata.usgs.gov/' WATERDATA_URL = WATERDATA_BASE_URL + 'nwis/' WATERSERVICE_URL = 'https://waterservices.usgs.gov/nwis/' @@ -38,6 +43,7 @@ 'water_use', 'ratings', ] +_CRS = "EPSG:4236" def format_response( @@ -71,6 +77,14 @@ def format_response( if service == 'peaks': df = preformat_peaks_response(df) + if gpd is not None: + if "dec_lat_va" in list(df): + geoms = gpd.points_from_xy( + df.dec_long_va.values, + df.dec_lat_va.values + ) + df = gpd.GeoDataFrame(df, geometry=geoms, crs=_CRS) + # check for multiple sites: if 'datetime' not in df.columns: # XXX: consider making site_no index From f297b2d1a0b6f2872f06bef154e124b5b245b4d7 Mon Sep 17 00:00:00 2001 From: jlarsen Date: Tue, 16 Jul 2024 14:46:34 -0700 Subject: [PATCH 3/3] Update tests for geopandas updates --- tests/waterservices_test.py | 123 ++++++++++++++++++++++++++++-------- 1 file changed, 98 insertions(+), 25 deletions(-) diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py index 19cc716..a04e09d 100755 --- a/tests/waterservices_test.py +++ b/tests/waterservices_test.py @@ -22,6 +22,10 @@ ) from dataretrieval.utils import NoSitesError +try: + import geopandas as gpd +except ImportError: + gpd = None def test_query_waterdata_validation(): """Tests the validation parameters of the query_waterservices method""" @@ -80,7 +84,10 @@ def test_get_dv(requests_mock): response_file_path = 'data/waterservices_dv.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_dv(sites=["01491000", "01645000"], start='2020-02-14', end='2020-02-15') - assert type(df) is DataFrame + + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 8 assert_metadata(requests_mock, request_url, md, site, None, format) @@ -99,7 +106,9 @@ def test_get_dv_site_value_types(requests_mock, site_input_type_list): else: sites = site df, md = get_dv(sites=sites, start='2020-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 8 @@ -112,7 +121,9 @@ def test_get_iv(requests_mock): response_file_path = 'data/waterservices_iv.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_iv(sites=["01491000", "01645000"], start='2019-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 563380 assert md.url == request_url assert_metadata(requests_mock, request_url, md, site, None, format) @@ -132,7 +143,8 @@ def test_get_iv_site_value_types(requests_mock, site_input_type_list): else: sites = site df, md = get_iv(sites=sites, start='2019-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 563380 assert md.url == request_url @@ -142,6 +154,7 @@ def test_get_info(requests_mock): Tests get_info method correctly generates the request url and returns the result in a DataFrame. Note that only sites and format are passed as query params """ + size = 24 format = "rdb" site = '01491000%2C01645000' parameter_cd = "00618" @@ -149,8 +162,18 @@ def test_get_info(requests_mock): response_file_path = 'data/waterservices_site.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_info(sites=["01491000", "01645000"], parameterCd="00618") - assert type(df) is DataFrame - assert df.size == 24 + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + + if "geometry" in list(df): + geom_type = df.geom_type.unique() + if len(geom_type) > 1 or geom_type[0] != "Point": + raise AssertionError( + f"Geometry type {geom_type} not valid, expecting Point" + ) + size += len(df) + + assert df.size == size assert md.url == request_url assert_metadata(requests_mock, request_url, md, site, [parameter_cd], format) @@ -167,7 +190,19 @@ def test_get_qwdata(requests_mock): mock_request(requests_mock, request_url, response_file_path) with pytest.warns(DeprecationWarning): df, md = get_qwdata(sites=["01491000", "01645000"]) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + + if "geometry" in list(df): + if not isinstance(df, gpd.GeoDataFrame): + raise AssertionError(f"{type(df)} is not a GeoDataFrame") + + geom_type = df.geom_type.unique() + if len(geom_type) > 1 or geom_type[0] != "Point": + raise AssertionError( + f"Geometry type {geom_type} not valid, expecting Point" + ) + assert df.size == 1821472 assert_metadata(requests_mock, request_url, md, site, None, format) @@ -202,7 +237,9 @@ def test_get_gwlevels(requests_mock): response_file_path = 'data/waterservices_gwlevels.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_gwlevels(sites=[site]) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 16 assert_metadata(requests_mock, request_url, md, site, None, format) @@ -221,7 +258,8 @@ def test_get_gwlevels_site_value_types(requests_mock, site_input_type_list): else: sites = site df, md = get_gwlevels(sites=sites) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 16 @@ -234,7 +272,9 @@ def test_get_discharge_peaks(requests_mock): response_file_path = 'data/waterservices_peaks.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_discharge_peaks(sites=[site], start='2000-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 240 assert_metadata(requests_mock, request_url, md, site, None, format) @@ -255,7 +295,9 @@ def test_get_discharge_peaks_sites_value_types(requests_mock, site_input_type_li sites = site df, md = get_discharge_peaks(sites=sites, start='2000-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 240 @@ -269,7 +311,9 @@ def test_get_discharge_measurements(requests_mock): response_file_path = 'data/waterdata_measurements.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_discharge_measurements(sites=[site], start='2000-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 2130 assert_metadata(requests_mock, request_url, md, site, None, format) @@ -288,7 +332,8 @@ def test_get_discharge_measurements_sites_value_types(requests_mock, site_input_ else: sites = site df, md = get_discharge_measurements(sites=sites, start='2000-02-14', end='2020-02-15') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 2130 @@ -300,7 +345,8 @@ def test_get_pmcodes(requests_mock): response_file_path = 'data/waterdata_pmcodes.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_pmcodes(parameterCd='00618') - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 13 assert_metadata(requests_mock, request_url, md, None, None, format) @@ -319,7 +365,8 @@ def test_get_pmcodes_parameterCd_value_types(requests_mock, parameterCd_input_ty else: parameterCd = parameterCd df, md = get_pmcodes(parameterCd=parameterCd) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 13 @@ -332,7 +379,8 @@ def test_get_water_use_national(requests_mock): response_file_path = 'data/water_use_national.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_water_use() - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 225 assert_metadata(requests_mock, request_url, md, None, None, format) @@ -369,7 +417,8 @@ def test_get_water_use_national_county_value_types(requests_mock, county_input_t else: counties = county df, md = get_water_use(counties=counties) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 225 @@ -387,7 +436,8 @@ def test_get_water_use_national_county_value_types(requests_mock, category_input else: categories = category df, md = get_water_use(categories=categories) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 225 @@ -400,7 +450,8 @@ def test_get_water_use_allegheny(requests_mock): response_file_path = 'data/water_use_allegheny.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_water_use(state="PA", counties="003") - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 1981 assert_metadata(requests_mock, request_url, md, None, None, format) @@ -421,13 +472,16 @@ def test_get_ratings(requests_mock): response_file_path = 'data/waterservices_ratings.txt' mock_request(requests_mock, request_url, response_file_path) df, md = get_ratings(site_no=site) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + assert df.size == 33 assert_metadata(requests_mock, request_url, md, site, None, format) def test_what_sites(requests_mock): """Tests what_sites method correctly generates the request url and returns the result in a DataFrame""" + size = 2472 format = "rdb" parameter_cd = '00010%2C00060' parameter_cd_list = ["00010","00060"] @@ -437,8 +491,22 @@ def test_what_sites(requests_mock): mock_request(requests_mock, request_url, response_file_path) df, md = what_sites(bBox=[-83.0,36.5,-81.0,38.5], parameterCd=parameter_cd_list, hasDataTypeCd="dv") - assert type(df) is DataFrame - assert df.size == 2472 + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") + + if gpd is not None: + if not isinstance(df, gpd.GeoDataFrame): + raise AssertionError(f"{type(df)} is not a GeoDataFrame") + + geom_type = df.geom_type.unique() + if len(geom_type) > 1 or geom_type[0] != "Point": + raise AssertionError( + f"Geometry type {geom_type} not valid, expecting Point" + ) + + size += len(df) + + assert df.size == size assert_metadata(requests_mock, request_url, md, None, parameter_cd_list, format) @@ -450,7 +518,8 @@ def test_get_stats(requests_mock): mock_request(requests_mock, request_url, response_file_path) df, md = get_stats(sites=["01491000", "01645000"]) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 51936 assert_metadata(requests_mock, request_url, md, None, None, format) @@ -468,7 +537,8 @@ def test_get_stats_site_value_types(requests_mock, site_input_type_list): else: sites = site df, md = get_stats(sites=sites) - assert type(df) is DataFrame + if not isinstance(df, DataFrame): + raise AssertionError(f"{type(df)} is not DataFrame base class type") assert df.size == 51936 @@ -486,7 +556,10 @@ def assert_metadata(requests_mock, request_url, md, site, parameter_cd, format): with open('data/waterservices_site.txt') as text: requests_mock.get(site_request_url, text=text.read()) site_info, _ = md.site_info - assert type(site_info) is DataFrame + if not isinstance(site_info, DataFrame): + raise AssertionError( + f"{type(site_info)} is not DataFrame base class type" + ) if parameter_cd is None: assert md.variable_info is None else: