From 31cfbf9a791ad9d679c7eea1b4dd426d4db585b2 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Thu, 14 Dec 2023 11:57:39 +0000 Subject: [PATCH 1/2] add tests for coord saving (downloader) --- tests/test_sheet_downloader.py | 214 +++++++++++++++++++++++---------- 1 file changed, 153 insertions(+), 61 deletions(-) diff --git a/tests/test_sheet_downloader.py b/tests/test_sheet_downloader.py index c24c8030..21c95538 100644 --- a/tests/test_sheet_downloader.py +++ b/tests/test_sheet_downloader.py @@ -3,6 +3,7 @@ import os from pathlib import Path +import pandas as pd import pytest from shapely.geometry import LineString, MultiPolygon, Polygon @@ -286,11 +287,55 @@ def test_download_all(sheet_downloader, tmp_path): sd.download_all_map_sheets(maps_path, metadata_fname) assert os.path.exists(f"{maps_path}/map_102352861.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 5 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[3].startswith("2,map_102352861.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 4 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert list(df["name"]) == [ + "map_101602026.png", + "map_101602038.png", + "map_102352861.png", + "map_91617032.png", + ] + # test coords + assert ( + df.loc[0, "coordinates"] + == "(-1.0546875, 53.33087298301705, -0.703125, 53.54030739150021)" + ) + + sd.get_grid_bb(14) + maps_path = tmp_path / "test_maps_14/" + metadata_fname = "test_metadata.csv" + sd.download_all_map_sheets(maps_path, metadata_fname) + assert os.path.exists(f"{maps_path}/map_102352861.png") + assert os.path.exists(f"{maps_path}/{metadata_fname}") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 4 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert list(df["name"]) == [ + "map_101602026.png", + "map_101602038.png", + "map_102352861.png", + "map_91617032.png", + ] + # test coords + assert ( + df.loc[0, "coordinates"] + == "(-0.98876953125, 53.448806835427575, -0.90087890625, 53.48804553605621)" + ) def test_download_all_kwargs(sheet_downloader, tmp_path): @@ -304,12 +349,27 @@ def test_download_all_kwargs(sheet_downloader, tmp_path): } sd.download_all_map_sheets(maps_path, metadata_fname, **kwargs) assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 5 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb,test1,test2\n" - assert "Six_Inch_GB_WFS.132" in csv[4] - assert "2021" in csv[4] + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 4 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + "test1", + "test2", + ] + assert list(df["name"]) == [ + "map_101602026.png", + "map_101602038.png", + "map_102352861.png", + "map_91617032.png", + ] + assert df.loc[3, "published_date"] == 2021 + assert df.loc[3, "test1"] == "test" + assert df.loc[3, "test2"] == "Six_Inch_GB_WFS.132" def test_download_by_wfs_ids(sheet_downloader, tmp_path): @@ -322,27 +382,32 @@ def test_download_by_wfs_ids(sheet_downloader, tmp_path): ) # test single wfs_id assert os.path.exists(f"{maps_path}/map_101602026.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 2 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_101602026.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 1 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert df.loc[0, "name"] == "map_101602026.png" sd.download_map_sheets_by_wfs_ids( [16320, 16321], maps_path, metadata_fname ) # test list of wfs_ids assert os.path.exists(f"{maps_path}/map_101602038.png") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 3 # should have only downloaded/added one extra map - + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 2 # should have only downloaded/added one extra map + assert df.loc[1, "name"] == "map_101602038.png" sd.download_map_sheets_by_wfs_ids( 16320, maps_path, metadata_fname, overwrite=True ) # test overwrite - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_101602026.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 2 + assert df.loc[0, "name"] == "map_101602026.png" + assert df.loc[1, "name"] == "map_101602038.png" def test_download_by_wfs_ids_errors(sheet_downloader, tmp_path): @@ -378,19 +443,25 @@ def test_download_by_polygon(sheet_downloader, tmp_path): ) # test mode = 'within' assert os.path.exists(f"{maps_path}/map_101602026.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 2 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_101602026.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 1 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert df.loc[0, "name"] == "map_101602026.png" sd.download_map_sheets_by_polygon( polygon, maps_path, metadata_fname, mode="intersects" ) # test mode = 'intersects', now 2 maps assert os.path.exists(f"{maps_path}/map_101602038.png") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 3 # should have only downloaded/added one extra map + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 2 # should have only downloaded/added one extra map + assert df.loc[1, "name"] == "map_101602038.png" def test_download_by_polygon_errors(sheet_downloader, tmp_path): @@ -417,11 +488,17 @@ def test_download_by_coords(sheet_downloader, tmp_path): sd.download_map_sheets_by_coordinates((-0.99, 53.43), maps_path, metadata_fname) assert os.path.exists(f"{maps_path}/map_101602038.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 2 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_101602038.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 1 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert df.loc[0, "name"] == "map_101602038.png" def test_download_by_coords_errors(sheet_downloader, tmp_path): @@ -442,11 +519,17 @@ def test_download_by_line(sheet_downloader, tmp_path): sd.download_map_sheets_by_line(line, maps_path, metadata_fname) assert os.path.exists(f"{maps_path}/map_101602026.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 3 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_101602026.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 2 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert list(df["name"]) == ["map_101602026.png", "map_101602038.png"] def test_download_by_line_errors(sheet_downloader, tmp_path): @@ -469,30 +552,34 @@ def test_download_by_string(sheet_downloader, tmp_path): ) # test w/ keys list assert os.path.exists(f"{maps_path}/map_91617032.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 2 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_91617032.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 1 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert df.loc[0, "name"] == "map_91617032.png" sd.download_map_sheets_by_string( "Six_Inch_GB_WFS.16320", "id", maps_path, metadata_fname ) # test append + w/ keys as string assert os.path.exists(f"{maps_path}/map_101602026.png") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 3 - assert csv[2].startswith("1,map_101602026.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 2 + assert df.loc[1, "name"] == "map_101602026.png" sd.download_map_sheets_by_string( "III.SW", path_save=maps_path, metadata_fname=metadata_fname ) # test w/ no keys assert os.path.exists(f"{maps_path}/map_101602038.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 4 - assert csv[3].startswith("2,map_101602038.png") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 3 + assert df.loc[2, "name"] == "map_101602038.png" def test_download_by_string_value_errors(sheet_downloader, tmp_path): @@ -531,12 +618,17 @@ def test_download_by_queries(sheet_downloader, tmp_path): assert os.path.exists(f"{maps_path}/map_101602026.png") assert os.path.exists(f"{maps_path}/map_91617032.png") assert os.path.exists(f"{maps_path}/{metadata_fname}") - with open(f"{maps_path}/{metadata_fname}") as f: - csv = f.readlines() - assert len(csv) == 3 - assert csv[0] == ",name,url,coordinates,crs,published_date,grid_bb\n" - assert csv[1].startswith("0,map_101602026.png,") - assert csv[2].startswith("1,map_91617032.png,") + df = pd.read_csv(f"{maps_path}/{metadata_fname}", sep=",", index_col=0) + assert len(df) == 2 + assert list(df.columns) == [ + "name", + "url", + "coordinates", + "crs", + "published_date", + "grid_bb", + ] + assert list(df["name"]) == ["map_101602026.png", "map_91617032.png"] def test_download_by_queries_errors(sheet_downloader, tmp_path): From 02c2984b4e5d7583488949bc719befd843538980 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Thu, 14 Dec 2023 12:06:26 +0000 Subject: [PATCH 2/2] add approx for coords --- tests/test_sheet_downloader.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test_sheet_downloader.py b/tests/test_sheet_downloader.py index 21c95538..9db67d27 100644 --- a/tests/test_sheet_downloader.py +++ b/tests/test_sheet_downloader.py @@ -1,10 +1,12 @@ from __future__ import annotations import os +from ast import literal_eval from pathlib import Path import pandas as pd import pytest +from pytest import approx from shapely.geometry import LineString, MultiPolygon, Polygon from mapreader import SheetDownloader @@ -304,9 +306,8 @@ def test_download_all(sheet_downloader, tmp_path): "map_91617032.png", ] # test coords - assert ( - df.loc[0, "coordinates"] - == "(-1.0546875, 53.33087298301705, -0.703125, 53.54030739150021)" + assert literal_eval(df.loc[0, "coordinates"]) == approx( + (-1.0546875, 53.33087298301705, -0.703125, 53.54030739150021), rel=1e-6 ) sd.get_grid_bb(14) @@ -332,9 +333,9 @@ def test_download_all(sheet_downloader, tmp_path): "map_91617032.png", ] # test coords - assert ( - df.loc[0, "coordinates"] - == "(-0.98876953125, 53.448806835427575, -0.90087890625, 53.48804553605621)" + assert literal_eval(df.loc[0, "coordinates"]) == approx( + (-0.98876953125, 53.448806835427575, -0.90087890625, 53.48804553605621), + rel=1e-6, )