From 3290625a6783a4e628494feb7a5e8d310b7797f6 Mon Sep 17 00:00:00 2001 From: remtav Date: Mon, 6 Jun 2022 16:46:37 -0400 Subject: [PATCH 1/8] add support for pathlib.Path object as input to all functions accepting paths as strings --- solaris/data/coco.py | 15 ++++++++------- solaris/eval/base.py | 15 ++++++++------- solaris/eval/pixel.py | 8 ++++---- solaris/eval/vector.py | 37 +++++++++++++++++++------------------ solaris/raster/image.py | 10 ++++++---- solaris/tile/raster_tile.py | 14 +++++++------- solaris/tile/vector_tile.py | 4 ++-- solaris/utils/core.py | 17 +++++++++-------- solaris/utils/data.py | 6 +++--- solaris/utils/geo.py | 17 +++++++++-------- solaris/utils/io.py | 2 +- solaris/vector/mask.py | 22 +++++++++++----------- solaris/vector/polygon.py | 23 ++++++++++++----------- 13 files changed, 99 insertions(+), 91 deletions(-) diff --git a/solaris/data/coco.py b/solaris/data/coco.py index 098663de..971e5c57 100644 --- a/solaris/data/coco.py +++ b/solaris/data/coco.py @@ -1,6 +1,7 @@ import json import logging import os +from pathlib import Path import geopandas as gpd import numpy as np @@ -47,7 +48,7 @@ def geojson2coco( Arguments --------- - image_src : :class:`str` or :class:`list` or :class:`dict` + image_src : :class:`str` or :class:`pathlib.Path` or :class:`list` or :class:`dict` Source image(s) to use in the dataset. This can be:: 1. a string path to an image, @@ -149,8 +150,8 @@ def geojson2coco( logger.setLevel(_get_logging_level(int(verbose))) logger.debug("Preparing image filename: image ID dict.") # pdb.set_trace() - if isinstance(image_src, str): - if image_src.endswith("json"): + if isinstance(image_src, (str, Path)): + if str(image_src).endswith("json"): logger.debug("COCO json provided. Extracting fname:id dict.") with open(image_src, "r") as f: image_ref = json.load(f) @@ -599,13 +600,13 @@ def _get_fname_list(p, recursive=False, extension=".tif"): """Get a list of filenames from p, which can be a dir, fname, or list.""" if isinstance(p, list): return p - elif isinstance(p, str): - if os.path.isdir(p): + elif isinstance(p, (str, Path)): + if Path(p).is_dir(): return get_files_recursively( p, traverse_subdirs=recursive, extension=extension ) - elif os.path.isfile(p): - return [p] + elif Path(p).is_file(): + return [str(p)] else: raise ValueError("If a string is provided, it must be a valid" " path.") else: diff --git a/solaris/eval/base.py b/solaris/eval/base.py index 53b08908..801bea90 100644 --- a/solaris/eval/base.py +++ b/solaris/eval/base.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import geopandas as gpd import pandas as pd @@ -29,7 +30,7 @@ class Evaluator: Arguments --------- - ground_truth_vector_file : str + ground_truth_vector_file : `str` or :class:`pathlib.Path` Path to .geojson file for ground truth. """ @@ -37,11 +38,11 @@ class Evaluator: def __init__(self, ground_truth_vector_file): # Load Ground Truth : Ground Truth should be in geojson or shape file try: - if ground_truth_vector_file.lower().endswith("json"): + if str(ground_truth_vector_file).lower().endswith("json"): self.load_truth(ground_truth_vector_file) - elif ground_truth_vector_file.lower().endswith("csv"): + elif str(ground_truth_vector_file).lower().endswith("csv"): self.load_truth(ground_truth_vector_file, truthCSV=True) - self.ground_truth_fname = ground_truth_vector_file + self.ground_truth_fname = str(ground_truth_vector_file) except AttributeError: # handles passing gdf instead of path to file self.ground_truth_GDF = ground_truth_vector_file self.ground_truth_fname = "GeoDataFrame variable" @@ -509,7 +510,7 @@ def load_proposal( Arguments --------- - proposal_vector_file : str + proposal_vector_file : `str` or :class:`pathlib.Path` Path to the file containing proposal vector objects. This can be a .geojson or a .csv. conf_field_list : list, optional @@ -540,7 +541,7 @@ def load_proposal( """ # Load Proposal if proposal_vector_file is a path to a file - if os.path.isfile(proposal_vector_file): + if Path(proposal_vector_file).is_file(): # if it's a CSV format, first read into a pd df and then convert # to gpd gdf by loading in geometries using shapely if proposalCSV: @@ -588,7 +589,7 @@ def load_truth( Arguments --------- - ground_truth_vector_file : str + ground_truth_vector_file : `str` or :class:`pathlib.Path` Path to the ground truth vector file. Must be either .geojson or .csv format. truthCSV : bool, optional diff --git a/solaris/eval/pixel.py b/solaris/eval/pixel.py index 36a74a08..44261fd5 100644 --- a/solaris/eval/pixel.py +++ b/solaris/eval/pixel.py @@ -84,13 +84,13 @@ def f1( ``1``, values < `prop_threshold` will be set to ``0``. show_plot : bool, optional Switch to plot the outputs. Defaults to ``False``. - im_file : str, optional + im_file : `str` or :class:`pathlib.Path`, optional Image file corresponding to the masks. Ignored if ``show_plot == False``. Defaults to ``''``. show_colorbar : bool, optional Switch to show colorbar. Ignored if ``show_plot == False``. Defaults to ``False``. - plot_file : str, optional + plot_file : `str` or :class:`pathlib.Path`, optional Output file if plotting. Ignored if ``show_plot == False``. Defaults to ``''``. dpi : int, optional @@ -167,7 +167,7 @@ def f1( plt.suptitle(title, fontsize=fontsize) # ground truth - if len(im_file) > 0: + if len(str(im_file)) > 0: # raw image ax1.imshow(cv2.imread(im_file, 1)) # ground truth @@ -211,7 +211,7 @@ def f1( # fig.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.subplots_adjust(top=0.8) - if len(plot_file) > 0: + if len(str(plot_file)) > 0: plt.savefig(plot_file, dpi=dpi) print("Time to create and save F1 plots:", time.time() - t0, "seconds") diff --git a/solaris/eval/vector.py b/solaris/eval/vector.py index da7f9b17..8041951c 100644 --- a/solaris/eval/vector.py +++ b/solaris/eval/vector.py @@ -1,5 +1,6 @@ import glob import os +from pathlib import Path import geopandas as gpd import numpy as np @@ -49,9 +50,9 @@ def get_all_objects( unique classes present in each Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -71,23 +72,23 @@ def get_all_objects( A union of the prop_objs and gt_objs lists """ objs = [] - os.chdir(proposal_polygons_dir) + os.chdir(str(proposal_polygons_dir)) search = "*" + file_format proposal_geojsons = glob.glob(search) for geojson in tqdm(proposal_geojsons): - ground_truth_poly = os.path.join(gt_polygons_dir, geojson) + ground_truth_poly = Path(gt_polygons_dir) / geojson if os.path.exists(ground_truth_poly): ground_truth_gdf = gpd.read_file(ground_truth_poly) proposal_gdf = gpd.read_file(geojson) for index, row in proposal_gdf.iterrows(): objs.append(row[prediction_cat_attrib]) prop_objs = list(set(objs)) - os.chdir(gt_polygons_dir) + os.chdir(str(gt_polygons_dir)) search = "*" + file_format objs = [] gt_geojsons = glob.glob(search) for geojson in tqdm(gt_geojsons): - proposal_poly = os.path.join(proposal_polygons_dir, geojson) + proposal_poly = Path(proposal_polygons_dir) / geojson if os.path.exists(proposal_poly): proposal_gdf = gpd.read_file(proposal_poly) ground_truth_gdf = gpd.read_file(geojson) @@ -114,9 +115,9 @@ def precision_calc( calculate metric for classes that exist in the ground truth. Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -148,7 +149,7 @@ def precision_calc( All confidences for each object for each class """ ious = [] - os.chdir(proposal_polygons_dir) + os.chdir(str(proposal_polygons_dir)) search = "*" + file_format proposal_geojsons = glob.glob(search) iou_holder = [] @@ -166,7 +167,7 @@ def precision_calc( confidences.append([]) for geojson in tqdm(proposal_geojsons): - ground_truth_poly = os.path.join(gt_polygons_dir, geojson) + ground_truth_poly = Path(gt_polygons_dir) / geojson if os.path.exists(ground_truth_poly): ground_truth_gdf = gpd.read_file(ground_truth_poly) proposal_gdf = gpd.read_file(geojson) @@ -241,9 +242,9 @@ def recall_calc( calculate metric for classes that exist in the ground truth. Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -270,7 +271,7 @@ def recall_calc( The mean recall score of recall_by_class """ ious = [] - os.chdir(gt_polygons_dir) + os.chdir(str(gt_polygons_dir)) search = "*" + file_format gt_geojsons = glob.glob(search) iou_holder = [] @@ -285,7 +286,7 @@ def recall_calc( for i in range(len(object_subset)): iou_holder.append([]) for geojson in tqdm(gt_geojsons): - proposal_poly = os.path.join(proposal_polygons_dir, geojson) + proposal_poly = Path(proposal_polygons_dir) / geojson if os.path.exists(proposal_poly): proposal_gdf = gpd.read_file(proposal_poly) ground_truth_gdf = gpd.read_file(geojson) @@ -353,9 +354,9 @@ def mF1( only calculate metric for classes that exist in the ground truth. Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -480,9 +481,9 @@ def mAP_score( Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies diff --git a/solaris/raster/image.py b/solaris/raster/image.py index cc751dc3..2f9566b5 100644 --- a/solaris/raster/image.py +++ b/solaris/raster/image.py @@ -1,3 +1,5 @@ +from pathlib import Path + import numpy as np import rasterio @@ -9,7 +11,7 @@ def get_geo_transform(raster_src): Arguments --------- - raster_src : str, :class:`rasterio.DatasetReader`, or `osgeo.gdal.Dataset` + raster_src : str, :class:`pathlib.Path`, :class:`rasterio.DatasetReader`, or `osgeo.gdal.Dataset` Path to a raster image with georeferencing data to apply to `geom`. Alternatively, an opened :class:`rasterio.Band` object or :class:`osgeo.gdal.Dataset` object can be provided. Required if not @@ -21,7 +23,7 @@ def get_geo_transform(raster_src): An affine transformation object to the image's location in its CRS. """ - if isinstance(raster_src, str): + if isinstance(raster_src, (str, Path)): affine_obj = rasterio.open(raster_src).transform elif isinstance(raster_src, rasterio.DatasetReader): affine_obj = raster_src.transform @@ -175,7 +177,7 @@ def stitch_images( # --------- # array : :class:`numpy.ndarray` # A numpy array with a the shape: [Channels, X, Y] or [X, Y] -# out_name : str +# out_name : str or :class:`pathlib.Path` # The output name and path for your image # proj : :class:`gdal.projection` # A projection, can be extracted from an image opened with gdal with @@ -200,7 +202,7 @@ def stitch_images( # driver = gdal.GetDriverByName("GTiff") # if len(array.shape) == 2: # array = array[np.newaxis, ...] -# os.makedirs(os.path.dirname(os.path.abspath(out_name)), exist_ok=True) +# Path(out_name).resolve().parent.mkdir(exist_ok=True) # dataset = driver.Create(out_name, array.shape[2], array.shape[1], array.shape[0], out_format) # if verbose is True: # print("Array Shape, should be [Channels, X, Y] or [X,Y]:", array.shape) diff --git a/solaris/tile/raster_tile.py b/solaris/tile/raster_tile.py index ea7a3dd2..a252ad91 100644 --- a/solaris/tile/raster_tile.py +++ b/solaris/tile/raster_tile.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import numpy as np import rasterio @@ -71,8 +72,8 @@ class RasterTiler(object): src_path : `str` The path or URL to the source dataset. Used for calling ``rio_cogeo.cogeo.cog_validate()``. - dest_dir : `str` - The directory to save the output tiles to. If not + dest_dir : `str` or :class:`pathlib.Path` + The directory to save the output tiles to. dest_crs : int The EPSG code for the output images. If not provided, outputs will keep the same CRS as the source image when ``Tiler.make_tile_images()`` @@ -129,8 +130,7 @@ def __init__( if verbose: print("Initializing Tiler...") self.dest_dir = dest_dir - if not os.path.exists(self.dest_dir): - os.makedirs(self.dest_dir) + Path(self.dest_dir).mkdir(exist_ok=True) if dest_crs is not None: self.dest_crs = _check_crs(dest_crs) else: @@ -180,7 +180,7 @@ def tile( Arguments --------- - src : :class:`rasterio.io.DatasetReader` or str + src : :class:`rasterio.io.DatasetReader`, str or :class:`pathlib.Path` The source dataset to tile. nodata_threshold : float, optional Nodata percentages greater than this threshold will not be saved as tiles. @@ -297,13 +297,13 @@ def tile_generator( Arguments --------- - src : `str` or :class:`Rasterio.DatasetReader` + src : `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader` The source data to tile from. If this is a "classic" (non-cloud-optimized) GeoTIFF, the whole image will be loaded in; if it's cloud-optimized, only the required portions will be loaded during tiling unless ``force_load_cog=True`` was specified upon initialization. - dest_dir : str, optional + dest_dir : str or :class:`pathlib.Path`, optional The path to the destination directory to output images to. If the path doesn't exist, it will be created. This argument is required if it wasn't provided during initialization. diff --git a/solaris/tile/vector_tile.py b/solaris/tile/vector_tile.py index 8c386b62..db8da8a9 100644 --- a/solaris/tile/vector_tile.py +++ b/solaris/tile/vector_tile.py @@ -59,7 +59,7 @@ def tile( Arguments --------- - src : `str` or :class:`geopandas.GeoDataFrame` + src : `str`, :class:`pathlib.Path` or :class:`geopandas.GeoDataFrame` The source vector data to tile. Must either be a path to a GeoJSON or a :class:`geopandas.GeoDataFrame`. tile_bounds : list @@ -152,7 +152,7 @@ def tile_generator( Arguments --------- - src : `str` or :class:`geopandas.GeoDataFrame` + src : `str`, :class:`pathlib.Path` or :class:`geopandas.GeoDataFrame` The source vector data to tile. Must either be a path to a GeoJSON or a :class:`geopandas.GeoDataFrame`. tile_bounds : list diff --git a/solaris/utils/core.py b/solaris/utils/core.py index bd46ccda..006733ff 100644 --- a/solaris/utils/core.py +++ b/solaris/utils/core.py @@ -1,5 +1,6 @@ import os from distutils.version import LooseVersion +from pathlib import Path from warnings import warn import geopandas as gpd @@ -17,7 +18,7 @@ def _check_rasterio_im_load(im): """Check if `im` is already loaded in; if not, load it in.""" - if isinstance(im, str): + if isinstance(im, (str, Path)): return rasterio.open(im) elif isinstance(im, rasterio.DatasetReader): return im @@ -27,7 +28,7 @@ def _check_rasterio_im_load(im): def _check_skimage_im_load(im): """Check if `im` is already loaded in; if not, load it in.""" - if isinstance(im, str): + if isinstance(im, (str, Path)): return skimage.io.imread(im) elif isinstance(im, np.ndarray): return im @@ -39,8 +40,8 @@ def _check_skimage_im_load(im): def _check_df_load(df): """Check if `df` is already loaded in, if not, load from file.""" - if isinstance(df, str): - if df.lower().endswith("json"): + if isinstance(df, (str, Path)): + if str(df).lower().endswith("json"): return _check_gdf_load(df) else: return pd.read_csv(df) @@ -52,11 +53,11 @@ def _check_df_load(df): def _check_gdf_load(gdf): """Check if `gdf` is already loaded in, if not, load from geojson.""" - if isinstance(gdf, str): + if isinstance(gdf, (str, Path)): # as of geopandas 0.6.2, using the OGR CSV driver requires some add'nal # kwargs to create a valid geodataframe with a geometry column. see # https://github.com/geopandas/geopandas/issues/1234 - if gdf.lower().endswith("csv"): + if str(gdf).lower().endswith("csv"): return gpd.read_file( gdf, GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO" ) @@ -117,7 +118,7 @@ def get_data_paths(path, infer=False): Arguments --------- - path : str + path : str or :class:`pathlib.Path Path to a .CSV-formatted reference file defining the location of training, validation, or inference data. See docs for details. infer : bool, optional @@ -142,7 +143,7 @@ def get_data_paths(path, infer=False): def get_files_recursively(path, traverse_subdirs=False, extension=".tif"): """Get files from subdirs of `path`, joining them to the dir.""" if traverse_subdirs: - walker = os.walk(path) + walker = os.walk(str(path)) path_list = [] for step in walker: if not step[2]: # if there are no files in the current dir diff --git a/solaris/utils/data.py b/solaris/utils/data.py index 966cea90..bde24c62 100644 --- a/solaris/utils/data.py +++ b/solaris/utils/data.py @@ -33,19 +33,19 @@ def make_dataset_csv( Arguments --------- - im_dir : str + im_dir : str or :class:`pathlib.Path` The path to the directory containing images to be used by your model. Images in sub-directories can be included by setting ``recursive=True``. im_ext : str, optional The file extension used by your images. Defaults to ``"tif"``. Not case sensitive. - label_dir : str, optional + label_dir : str or :class:`pathlib.Path`, optional The path to the directory containing images to be used by your model. Images in sub-directories can be included by setting ``recursive=True``. This argument is required if `stage` is ``"train"`` (default) or ``"val"``, but has no effect if `stage` is ``"infer"``. - output_path : str, optional + output_path : str or :class:`pathlib.Path`, optional The path to save the generated CSV to. Defaults to ``"dataset.csv"``. stage : str, optional The stage that the csv is generated for. Can be ``"train"`` (default), diff --git a/solaris/utils/geo.py b/solaris/utils/geo.py index 7167408f..4aa0e293 100644 --- a/solaris/utils/geo.py +++ b/solaris/utils/geo.py @@ -1,6 +1,7 @@ import json import os import sys +from pathlib import Path from warnings import warn import geopandas as gpd @@ -52,7 +53,7 @@ def reproject( Arguments --------- - input_object : `str` or :class:`rasterio.DatasetReader` or :class:`geopandas.GeoDataFrame` + input_object : `str`, :class:`pathlib.Path`, :class:`Rasterio.DatasetReader` or :class:`geopandas.GeoDataFrame` An object to transform to a new CRS. If a string, it must be a path to a georegistered image or vector dataset (e.g. a .GeoJSON). If the object itself does not contain georeferencing information, the @@ -73,7 +74,7 @@ def reproject( `target_crs` is provided, the input will be projected into the appropriate UTM zone. `target_crs` takes precedence if both it and `target_object` are provided. - dest_path : str, optional + dest_path : `str` or :class:`pathlib.Path`, optional The path to save the output to (if desired). resampling_method : str, optional The resampling method to use during reprojection of raster data. **Only @@ -261,11 +262,11 @@ def get_crs(obj): def _parse_geo_data(input): - if isinstance(input, str): - if input.lower().endswith("json") or input.lower().endswith("csv"): + if isinstance(input, (str, Path)): + if str(input).lower().endswith("json") or str(input).lower().endswith("csv"): input_type = "vector" input_data = _check_df_load(input) - elif input.lower().endswith("tif") or input.lower().endswith("tiff"): + elif str(input).lower().endswith("tif") or str(input).lower().endswith("tiff"): input_type = "raster" input_data = _check_rasterio_im_load(input) else: @@ -299,7 +300,7 @@ def reproject_geometry(input_geom, input_crs=None, target_crs=None, affine_obj=N The target coordinate reference system to re-project the geometry into. If not provided, the appropriate UTM zone will be selected by default, unless `affine_transform` is provided (and therefore CRSs are ignored.) - affine_transform : :class:`affine.Affine`, optional + affine_obj : :class:`affine.Affine`, optional An :class:`affine.Affine` object (or a ``[a, b, c, d, e, f]`` list to convert to that format) to use for transformation. Has no effect unless `input_crs` **and** `target_crs` are not provided. @@ -372,7 +373,7 @@ def raster_get_projection_unit(image): Arguments --------- - image : raster image, GeoTIFF or other format + image : `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader` A raster file with georeferencing Notes @@ -807,7 +808,7 @@ def split_geom( resolution: `tuple` of `float`s, optional (x resolution, y resolution). Used by default if use_metric_size is False. Can be acquired from rasterio dataset object's metadata. - src_img: `str` or `raster`, optional + src_img: `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader`, optional A rasterio raster object or path to a geotiff. The bounds of this raster and the geometry will be intersected and the result of the intersection will be tiled. Useful in cases where the extent of collected labels and source imagery partially overlap. The src_img must have the same projection units diff --git a/solaris/utils/io.py b/solaris/utils/io.py index df151b2b..571ad8b3 100644 --- a/solaris/utils/io.py +++ b/solaris/utils/io.py @@ -20,7 +20,7 @@ def imread( Arguments --------- - path : str + path : `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader` Path to the image file to load. make_8bit : bool, optional Should the image be converted to an 8-bit format? Defaults to False. diff --git a/solaris/vector/mask.py b/solaris/vector/mask.py index 903dbcd8..d16f2a11 100644 --- a/solaris/vector/mask.py +++ b/solaris/vector/mask.py @@ -62,11 +62,11 @@ def df_to_px_mask( maximum spacing between polygons to be labeled. Each channel correspond to its own `shape` plane in the output. - out_file : str, optional + out_file : str or :class:`pathlib.Path`, optional Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored. @@ -189,7 +189,7 @@ def footprint_mask( Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored. @@ -307,7 +307,7 @@ def boundary_mask( Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored @@ -403,7 +403,7 @@ def contact_mask( Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored. @@ -547,7 +547,7 @@ def road_mask( the function will attempt to transform to the relevant CRS using ``df.to_crs()`` (if `df` is a :class:`geopandas.GeoDataFrame`) or using the data provided in `reference_im` (if not). - out_file : str, optional + out_file : str or :class:`pathlib.Path`, optional Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). @@ -580,7 +580,7 @@ def road_mask( burn_field : str, optional Name of a column in `df` that provides values for `burn_value` for each independent object. If provided, `burn_value` is ignored. - min_background_val : int + min_background_value : int Minimum value for mask background. Optional, ignore if ``None``. Defaults to ``None``. verbose : str, optional @@ -670,7 +670,7 @@ def buffer_df_geoms( meters : bool, optional Should buffers be in pixel units (default) or metric units (if `meters` is ``True``)? - reference_im : `str` or :class:`rasterio.DatasetReader`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional The path to a reference image covering the same geographic extent as the area labeled in `df`. Provided for georeferencing of pixel coordinate geometries in `df` or conversion of georeferenced geometries @@ -830,11 +830,11 @@ def mask_to_poly_geojson( If not provided, no scaling will be performend and channels will be summed. - reference_im : str, optional + reference_im : str or :class:`pathlib.Path`, optional The path to a reference geotiff to use for georeferencing the polygons in the mask. Required if saving to a GeoJSON (see the ``output_type`` argument), otherwise only required if ``do_transform=True``. - output_path : str, optional + output_path : str or :class:`pathlib.Path`, optional Path to save the output file to. If not provided, no file is saved. output_type : ``'csv'`` or ``'geojson'``, optional If ``output_path`` is provided, this argument defines what type of file @@ -953,7 +953,7 @@ def instance_mask( with a column containing geometries (identified by `geom_col`). If the geometries in `df` are not in pixel coordinates, then `affine` or `reference_im` must be passed to provide the transformation to convert. - out_file : str, optional + out_file : str or :class:`pathlib.Path`, optional Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). diff --git a/solaris/vector/polygon.py b/solaris/vector/polygon.py index f5452ef0..11039690 100644 --- a/solaris/vector/polygon.py +++ b/solaris/vector/polygon.py @@ -1,5 +1,6 @@ import os import shutil +from pathlib import Path import geopandas as gpd import pandas as pd @@ -25,7 +26,7 @@ def convert_poly_coords( geom : :class:`shapely.geometry.shape` or str A :class:`shapely.geometry.shape`, or WKT string-formatted geometry object currently in pixel coordinates. - raster_src : str, optional + raster_src : str or :class:`pathlib.Path`, optional Path to a raster image with georeferencing data to apply to `geom`. Alternatively, an opened :class:`rasterio.Band` object or :class:`osgeo.gdal.Dataset` object can be provided. Required if not @@ -164,7 +165,7 @@ def georegister_px_df( df : :class:`pandas.DataFrame` A :class:`pandas.DataFrame` with polygons in a column named ``"geometry"``. - im_path : str, optional + im_path : str or :class:`pathlib.Path`, optional A filename or :class:`rasterio.DatasetReader` object containing an image that has the same bounds as the pixel coordinates in `df`. If not provided, `affine_obj` and `crs` must both be provided. @@ -223,11 +224,11 @@ def geojson_to_px_gdf( Arguments --------- - geojson : str + geojson : str or :class:`pathlib.Path` Path to a geojson. This function will also accept a :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` with a column named ``'geometry'`` in this argument. - im_path : str + im_path : str or :class:`pathlib.Path` Path to a georeferenced image (ie a GeoTIFF) that geolocates to the same geography as the `geojson`(s). This function will also accept a :class:`osgeo.gdal.Dataset` or :class:`rasterio.DatasetReader` with @@ -299,7 +300,7 @@ def get_overlapping_subset(gdf, im=None, bbox=None, bbox_crs=None): --------- gdf : :class:`geopandas.GeoDataFrame` A :class:`geopandas.GeoDataFrame` instance or a path to a geojson. - im : :class:`rasterio.DatasetReader` or `str`, optional + im : :class:`rasterio.DatasetReader`, `str` or :class:`pathlib.Path`, optional An image object loaded with `rasterio` or a path to a georeferenced image (i.e. a GeoTIFF). bbox : `list` or :class:`shapely.geometry.Polygon`, optional @@ -369,19 +370,19 @@ def gdf_to_yolo( Arguments --------- - geodataframe : str + geodataframe : str or :class:`pathlib.Path` Path to a :class:`geopandas.GeoDataFrame` with a column named ``'geometry'``. Can be created from a geojson with labels for unique objects. Can be converted to this format with ``geodataframe=gpd.read_file("./xView_30.geojson")``. - im_path : str + image : str or :class:`pathlib.Path` Path to a georeferenced image (ie a GeoTIFF or png created with GDAL) that geolocates to the same geography as the `geojson`(s). If a directory, the bounds of each GeoTIFF will be loaded in and all overlapping geometries will be transformed. This function will also accept a :class:`osgeo.gdal.Dataset` or :class:`rasterio.DatasetReader` with georeferencing information in this argument. - output_dir : str + output_dir : str or :class:`pathlib.Path` Path to an output directory where all of the yolo readable text files will be placed. column : str, optional @@ -419,9 +420,9 @@ def gdf_to_yolo( dw = 1.0 / im_size[0] dh = 1.0 / im_size[1] header = [column, "x", "y", "w", "h"] - if os.path.isdir(output_dir) is False: - os.mkdir(output_dir) - output = os.path.join(output_dir, image.split(".png")[0] + ".txt") + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True) + output = output_dir / f"{Path(image).stem}.txt" gdf = geojson_to_px_gdf(geodataframe, image, precision=None) gdf["area"] = gdf["geometry"].area gdf["intersection"] = gdf["geometry"].intersection(pix_poly).area / gdf["area"] From a3ddd718a6f461766183efa831d7d6374580a872 Mon Sep 17 00:00:00 2001 From: remtav Date: Wed, 8 Jun 2022 11:21:00 -0400 Subject: [PATCH 2/8] base.py: - refactor Evaluator's __init__ to better use existing utilities - load_truth() uses dedicated _check_gdf_load() function core.py: _check_gdf_load() returns empty gdf with sindex if error raised --- solaris/eval/base.py | 23 ++++++++++------------- solaris/utils/core.py | 4 +++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/solaris/eval/base.py b/solaris/eval/base.py index 801bea90..511de1da 100644 --- a/solaris/eval/base.py +++ b/solaris/eval/base.py @@ -6,6 +6,7 @@ import shapely.wkt from fiona._err import CPLE_OpenFailedError from fiona.errors import DriverError +from solaris.utils.core import _check_gdf_load from tqdm.auto import tqdm from . import iou @@ -37,21 +38,22 @@ class Evaluator: def __init__(self, ground_truth_vector_file): # Load Ground Truth : Ground Truth should be in geojson or shape file - try: - if str(ground_truth_vector_file).lower().endswith("json"): - self.load_truth(ground_truth_vector_file) - elif str(ground_truth_vector_file).lower().endswith("csv"): - self.load_truth(ground_truth_vector_file, truthCSV=True) + if isinstance(ground_truth_vector_file, (str, Path)): self.ground_truth_fname = str(ground_truth_vector_file) - except AttributeError: # handles passing gdf instead of path to file - self.ground_truth_GDF = ground_truth_vector_file + else: self.ground_truth_fname = "GeoDataFrame variable" + + if isinstance(ground_truth_vector_file, (str, Path)) and ground_truth_vector_file.lower().endswith("csv"): + self.load_truth(ground_truth_vector_file, truthCSV=True) + else: + self.load_truth(ground_truth_vector_file) self.ground_truth_sindex = self.ground_truth_GDF.sindex # get sindex # create deep copy of ground truth file for calculations self.ground_truth_GDF_Edit = self.ground_truth_GDF.copy(deep=True) self.proposal_GDF = gpd.GeoDataFrame([]) # initialize proposal GDF def __repr__(self): + return "Evaluator {}".format(os.path.split(self.ground_truth_fname)[-1]) def get_iou_by_building(self): @@ -618,12 +620,7 @@ def load_truth( ], ) else: - try: - self.ground_truth_GDF = gpd.read_file(ground_truth_vector_file) - except (CPLE_OpenFailedError, DriverError): # empty geojson - self.ground_truth_GDF = gpd.GeoDataFrame( - {"sindex": [], "condition": [], "geometry": []} - ) + self.ground_truth_GDF = _check_gdf_load(ground_truth_vector_file) # force calculation of spatialindex self.ground_truth_sindex = self.ground_truth_GDF.sindex # create deep copy of ground truth file for calculations diff --git a/solaris/utils/core.py b/solaris/utils/core.py index 006733ff..633e0aa9 100644 --- a/solaris/utils/core.py +++ b/solaris/utils/core.py @@ -69,7 +69,9 @@ def _check_gdf_load(gdf): " path or it isn't a valid vector file. Returning an empty" " GeoDataFrame." ) - return gpd.GeoDataFrame() + return gpd.GeoDataFrame( + {"sindex": [], "condition": [], "geometry": []} + ) elif isinstance(gdf, gpd.GeoDataFrame): return gdf else: From 1356c8d33a17c6e5a79a73ea9dd13526f10e4d05 Mon Sep 17 00:00:00 2001 From: remtav Date: Wed, 8 Jun 2022 12:02:01 -0400 Subject: [PATCH 3/8] test_core.py: add tests from pathlib.Path objects (necessary?) --- tests/test_utils/test_core.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_utils/test_core.py b/tests/test_utils/test_core.py index 5356a19a..1de92bb7 100644 --- a/tests/test_utils/test_core.py +++ b/tests/test_utils/test_core.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import geopandas as gpd import numpy as np @@ -26,6 +27,13 @@ def test_unloaded_geojson(self): assert truth_gdf.equals(test_gdf) + def test_unloaded_geojson_from_pathlib(self): + geojson_path = Path(data_dir) / "sample.geojson" + truth_gdf = gpd.read_file(geojson_path) + test_gdf = _check_gdf_load(geojson_path) + + assert truth_gdf.equals(test_gdf) + def test_loaded_geojson(self): geojson_path = os.path.join(data_dir, "sample.geojson") truth_gdf = gpd.read_file(geojson_path) @@ -40,6 +48,13 @@ def test_unloaded_df(self): assert truth_df.equals(test_df) + def test_unloaded_df_from_pathlib(self): + csv_path = Path(data_dir) / "sample.csv" + truth_df = pd.read_csv(csv_path) + test_df = _check_df_load(csv_path) + + assert truth_df.equals(test_df) + def test_loaded_df(self): csv_path = os.path.join(data_dir, "sample.csv") truth_df = pd.read_csv(csv_path) @@ -58,6 +73,17 @@ def test_unloaded_image(self): truth_im.close() # need to close the rasterio datasetreader objects test_im.close() + def test_unloaded_image_from_pathlib(self): + im_path = Path(data_dir) / "sample_geotiff.tif" + truth_im = rasterio.open(im_path) + test_im = _check_rasterio_im_load(im_path) + + assert truth_im.profile == test_im.profile + assert np.array_equal(truth_im.read(1), test_im.read(1)) + + truth_im.close() # need to close the rasterio datasetreader objects + test_im.close() + def test_loaded_image(self): im_path = os.path.join(data_dir, "sample_geotiff.tif") truth_im = rasterio.open(im_path) From aebd3ed8ee268f4ae4fd071cb0546261fd8b09c3 Mon Sep 17 00:00:00 2001 From: Ryan Avery Date: Thu, 30 Jun 2022 11:26:12 -0700 Subject: [PATCH 4/8] Update environment.yaml --- environment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yaml b/environment.yaml index 20b1f0af..fcbaaf59 100644 --- a/environment.yaml +++ b/environment.yaml @@ -9,7 +9,7 @@ dependencies: - geopandas>=0.7.0 - matplotlib>=3.1.2 - numpy>=1.17.3 - - opencv-python>=4.1 + - opencv>=4.1 - pandas>=0.25.3 - pyproj>=2.1 - PyYAML>=5.4 From a8b73a25640466fd372ef65d3391679442917498 Mon Sep 17 00:00:00 2001 From: Ryan Avery Date: Thu, 7 Jul 2022 10:48:15 -0700 Subject: [PATCH 5/8] add pull request trigger attempting to allow first time contributors to run CI on their PRs --- .github/workflows/tests.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3eb64408..e01848fe 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,8 +1,6 @@ name: Test Python package -on: - - push - +on: [push, pull_request] jobs: test: runs-on: ubuntu-latest From dfaa528ca5737ca3f264dff8dd1af82fe4cd45c5 Mon Sep 17 00:00:00 2001 From: remtav Date: Mon, 6 Jun 2022 16:46:37 -0400 Subject: [PATCH 6/8] add support for pathlib.Path object as input to all functions accepting paths as strings --- solaris/data/coco.py | 15 ++++++++------- solaris/eval/base.py | 15 ++++++++------- solaris/eval/pixel.py | 8 ++++---- solaris/eval/vector.py | 37 +++++++++++++++++++------------------ solaris/raster/image.py | 10 ++++++---- solaris/tile/raster_tile.py | 14 +++++++------- solaris/tile/vector_tile.py | 4 ++-- solaris/utils/core.py | 17 +++++++++-------- solaris/utils/data.py | 6 +++--- solaris/utils/geo.py | 17 +++++++++-------- solaris/utils/io.py | 2 +- solaris/vector/mask.py | 22 +++++++++++----------- solaris/vector/polygon.py | 23 ++++++++++++----------- 13 files changed, 99 insertions(+), 91 deletions(-) diff --git a/solaris/data/coco.py b/solaris/data/coco.py index 098663de..971e5c57 100644 --- a/solaris/data/coco.py +++ b/solaris/data/coco.py @@ -1,6 +1,7 @@ import json import logging import os +from pathlib import Path import geopandas as gpd import numpy as np @@ -47,7 +48,7 @@ def geojson2coco( Arguments --------- - image_src : :class:`str` or :class:`list` or :class:`dict` + image_src : :class:`str` or :class:`pathlib.Path` or :class:`list` or :class:`dict` Source image(s) to use in the dataset. This can be:: 1. a string path to an image, @@ -149,8 +150,8 @@ def geojson2coco( logger.setLevel(_get_logging_level(int(verbose))) logger.debug("Preparing image filename: image ID dict.") # pdb.set_trace() - if isinstance(image_src, str): - if image_src.endswith("json"): + if isinstance(image_src, (str, Path)): + if str(image_src).endswith("json"): logger.debug("COCO json provided. Extracting fname:id dict.") with open(image_src, "r") as f: image_ref = json.load(f) @@ -599,13 +600,13 @@ def _get_fname_list(p, recursive=False, extension=".tif"): """Get a list of filenames from p, which can be a dir, fname, or list.""" if isinstance(p, list): return p - elif isinstance(p, str): - if os.path.isdir(p): + elif isinstance(p, (str, Path)): + if Path(p).is_dir(): return get_files_recursively( p, traverse_subdirs=recursive, extension=extension ) - elif os.path.isfile(p): - return [p] + elif Path(p).is_file(): + return [str(p)] else: raise ValueError("If a string is provided, it must be a valid" " path.") else: diff --git a/solaris/eval/base.py b/solaris/eval/base.py index 53b08908..801bea90 100644 --- a/solaris/eval/base.py +++ b/solaris/eval/base.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import geopandas as gpd import pandas as pd @@ -29,7 +30,7 @@ class Evaluator: Arguments --------- - ground_truth_vector_file : str + ground_truth_vector_file : `str` or :class:`pathlib.Path` Path to .geojson file for ground truth. """ @@ -37,11 +38,11 @@ class Evaluator: def __init__(self, ground_truth_vector_file): # Load Ground Truth : Ground Truth should be in geojson or shape file try: - if ground_truth_vector_file.lower().endswith("json"): + if str(ground_truth_vector_file).lower().endswith("json"): self.load_truth(ground_truth_vector_file) - elif ground_truth_vector_file.lower().endswith("csv"): + elif str(ground_truth_vector_file).lower().endswith("csv"): self.load_truth(ground_truth_vector_file, truthCSV=True) - self.ground_truth_fname = ground_truth_vector_file + self.ground_truth_fname = str(ground_truth_vector_file) except AttributeError: # handles passing gdf instead of path to file self.ground_truth_GDF = ground_truth_vector_file self.ground_truth_fname = "GeoDataFrame variable" @@ -509,7 +510,7 @@ def load_proposal( Arguments --------- - proposal_vector_file : str + proposal_vector_file : `str` or :class:`pathlib.Path` Path to the file containing proposal vector objects. This can be a .geojson or a .csv. conf_field_list : list, optional @@ -540,7 +541,7 @@ def load_proposal( """ # Load Proposal if proposal_vector_file is a path to a file - if os.path.isfile(proposal_vector_file): + if Path(proposal_vector_file).is_file(): # if it's a CSV format, first read into a pd df and then convert # to gpd gdf by loading in geometries using shapely if proposalCSV: @@ -588,7 +589,7 @@ def load_truth( Arguments --------- - ground_truth_vector_file : str + ground_truth_vector_file : `str` or :class:`pathlib.Path` Path to the ground truth vector file. Must be either .geojson or .csv format. truthCSV : bool, optional diff --git a/solaris/eval/pixel.py b/solaris/eval/pixel.py index 36a74a08..44261fd5 100644 --- a/solaris/eval/pixel.py +++ b/solaris/eval/pixel.py @@ -84,13 +84,13 @@ def f1( ``1``, values < `prop_threshold` will be set to ``0``. show_plot : bool, optional Switch to plot the outputs. Defaults to ``False``. - im_file : str, optional + im_file : `str` or :class:`pathlib.Path`, optional Image file corresponding to the masks. Ignored if ``show_plot == False``. Defaults to ``''``. show_colorbar : bool, optional Switch to show colorbar. Ignored if ``show_plot == False``. Defaults to ``False``. - plot_file : str, optional + plot_file : `str` or :class:`pathlib.Path`, optional Output file if plotting. Ignored if ``show_plot == False``. Defaults to ``''``. dpi : int, optional @@ -167,7 +167,7 @@ def f1( plt.suptitle(title, fontsize=fontsize) # ground truth - if len(im_file) > 0: + if len(str(im_file)) > 0: # raw image ax1.imshow(cv2.imread(im_file, 1)) # ground truth @@ -211,7 +211,7 @@ def f1( # fig.tight_layout(rect=[0, 0.03, 1, 0.95]) plt.subplots_adjust(top=0.8) - if len(plot_file) > 0: + if len(str(plot_file)) > 0: plt.savefig(plot_file, dpi=dpi) print("Time to create and save F1 plots:", time.time() - t0, "seconds") diff --git a/solaris/eval/vector.py b/solaris/eval/vector.py index da7f9b17..8041951c 100644 --- a/solaris/eval/vector.py +++ b/solaris/eval/vector.py @@ -1,5 +1,6 @@ import glob import os +from pathlib import Path import geopandas as gpd import numpy as np @@ -49,9 +50,9 @@ def get_all_objects( unique classes present in each Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -71,23 +72,23 @@ def get_all_objects( A union of the prop_objs and gt_objs lists """ objs = [] - os.chdir(proposal_polygons_dir) + os.chdir(str(proposal_polygons_dir)) search = "*" + file_format proposal_geojsons = glob.glob(search) for geojson in tqdm(proposal_geojsons): - ground_truth_poly = os.path.join(gt_polygons_dir, geojson) + ground_truth_poly = Path(gt_polygons_dir) / geojson if os.path.exists(ground_truth_poly): ground_truth_gdf = gpd.read_file(ground_truth_poly) proposal_gdf = gpd.read_file(geojson) for index, row in proposal_gdf.iterrows(): objs.append(row[prediction_cat_attrib]) prop_objs = list(set(objs)) - os.chdir(gt_polygons_dir) + os.chdir(str(gt_polygons_dir)) search = "*" + file_format objs = [] gt_geojsons = glob.glob(search) for geojson in tqdm(gt_geojsons): - proposal_poly = os.path.join(proposal_polygons_dir, geojson) + proposal_poly = Path(proposal_polygons_dir) / geojson if os.path.exists(proposal_poly): proposal_gdf = gpd.read_file(proposal_poly) ground_truth_gdf = gpd.read_file(geojson) @@ -114,9 +115,9 @@ def precision_calc( calculate metric for classes that exist in the ground truth. Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -148,7 +149,7 @@ def precision_calc( All confidences for each object for each class """ ious = [] - os.chdir(proposal_polygons_dir) + os.chdir(str(proposal_polygons_dir)) search = "*" + file_format proposal_geojsons = glob.glob(search) iou_holder = [] @@ -166,7 +167,7 @@ def precision_calc( confidences.append([]) for geojson in tqdm(proposal_geojsons): - ground_truth_poly = os.path.join(gt_polygons_dir, geojson) + ground_truth_poly = Path(gt_polygons_dir) / geojson if os.path.exists(ground_truth_poly): ground_truth_gdf = gpd.read_file(ground_truth_poly) proposal_gdf = gpd.read_file(geojson) @@ -241,9 +242,9 @@ def recall_calc( calculate metric for classes that exist in the ground truth. Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -270,7 +271,7 @@ def recall_calc( The mean recall score of recall_by_class """ ious = [] - os.chdir(gt_polygons_dir) + os.chdir(str(gt_polygons_dir)) search = "*" + file_format gt_geojsons = glob.glob(search) iou_holder = [] @@ -285,7 +286,7 @@ def recall_calc( for i in range(len(object_subset)): iou_holder.append([]) for geojson in tqdm(gt_geojsons): - proposal_poly = os.path.join(proposal_polygons_dir, geojson) + proposal_poly = Path(proposal_polygons_dir) / geojson if os.path.exists(proposal_poly): proposal_gdf = gpd.read_file(proposal_poly) ground_truth_gdf = gpd.read_file(geojson) @@ -353,9 +354,9 @@ def mF1( only calculate metric for classes that exist in the ground truth. Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies @@ -480,9 +481,9 @@ def mAP_score( Arguments --------- - proposal_polygons_dir : str + proposal_polygons_dir : `str` or :class:`pathlib.Path` The path that contains any model proposal polygons - gt_polygons_dir : str + gt_polygons_dir : `str` or :class:`pathlib.Path` The path that contains the ground truth polygons prediction_cat_attrib : str The column or attribute within the predictions that specifies diff --git a/solaris/raster/image.py b/solaris/raster/image.py index cc751dc3..2f9566b5 100644 --- a/solaris/raster/image.py +++ b/solaris/raster/image.py @@ -1,3 +1,5 @@ +from pathlib import Path + import numpy as np import rasterio @@ -9,7 +11,7 @@ def get_geo_transform(raster_src): Arguments --------- - raster_src : str, :class:`rasterio.DatasetReader`, or `osgeo.gdal.Dataset` + raster_src : str, :class:`pathlib.Path`, :class:`rasterio.DatasetReader`, or `osgeo.gdal.Dataset` Path to a raster image with georeferencing data to apply to `geom`. Alternatively, an opened :class:`rasterio.Band` object or :class:`osgeo.gdal.Dataset` object can be provided. Required if not @@ -21,7 +23,7 @@ def get_geo_transform(raster_src): An affine transformation object to the image's location in its CRS. """ - if isinstance(raster_src, str): + if isinstance(raster_src, (str, Path)): affine_obj = rasterio.open(raster_src).transform elif isinstance(raster_src, rasterio.DatasetReader): affine_obj = raster_src.transform @@ -175,7 +177,7 @@ def stitch_images( # --------- # array : :class:`numpy.ndarray` # A numpy array with a the shape: [Channels, X, Y] or [X, Y] -# out_name : str +# out_name : str or :class:`pathlib.Path` # The output name and path for your image # proj : :class:`gdal.projection` # A projection, can be extracted from an image opened with gdal with @@ -200,7 +202,7 @@ def stitch_images( # driver = gdal.GetDriverByName("GTiff") # if len(array.shape) == 2: # array = array[np.newaxis, ...] -# os.makedirs(os.path.dirname(os.path.abspath(out_name)), exist_ok=True) +# Path(out_name).resolve().parent.mkdir(exist_ok=True) # dataset = driver.Create(out_name, array.shape[2], array.shape[1], array.shape[0], out_format) # if verbose is True: # print("Array Shape, should be [Channels, X, Y] or [X,Y]:", array.shape) diff --git a/solaris/tile/raster_tile.py b/solaris/tile/raster_tile.py index ea7a3dd2..a252ad91 100644 --- a/solaris/tile/raster_tile.py +++ b/solaris/tile/raster_tile.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import numpy as np import rasterio @@ -71,8 +72,8 @@ class RasterTiler(object): src_path : `str` The path or URL to the source dataset. Used for calling ``rio_cogeo.cogeo.cog_validate()``. - dest_dir : `str` - The directory to save the output tiles to. If not + dest_dir : `str` or :class:`pathlib.Path` + The directory to save the output tiles to. dest_crs : int The EPSG code for the output images. If not provided, outputs will keep the same CRS as the source image when ``Tiler.make_tile_images()`` @@ -129,8 +130,7 @@ def __init__( if verbose: print("Initializing Tiler...") self.dest_dir = dest_dir - if not os.path.exists(self.dest_dir): - os.makedirs(self.dest_dir) + Path(self.dest_dir).mkdir(exist_ok=True) if dest_crs is not None: self.dest_crs = _check_crs(dest_crs) else: @@ -180,7 +180,7 @@ def tile( Arguments --------- - src : :class:`rasterio.io.DatasetReader` or str + src : :class:`rasterio.io.DatasetReader`, str or :class:`pathlib.Path` The source dataset to tile. nodata_threshold : float, optional Nodata percentages greater than this threshold will not be saved as tiles. @@ -297,13 +297,13 @@ def tile_generator( Arguments --------- - src : `str` or :class:`Rasterio.DatasetReader` + src : `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader` The source data to tile from. If this is a "classic" (non-cloud-optimized) GeoTIFF, the whole image will be loaded in; if it's cloud-optimized, only the required portions will be loaded during tiling unless ``force_load_cog=True`` was specified upon initialization. - dest_dir : str, optional + dest_dir : str or :class:`pathlib.Path`, optional The path to the destination directory to output images to. If the path doesn't exist, it will be created. This argument is required if it wasn't provided during initialization. diff --git a/solaris/tile/vector_tile.py b/solaris/tile/vector_tile.py index 8c386b62..db8da8a9 100644 --- a/solaris/tile/vector_tile.py +++ b/solaris/tile/vector_tile.py @@ -59,7 +59,7 @@ def tile( Arguments --------- - src : `str` or :class:`geopandas.GeoDataFrame` + src : `str`, :class:`pathlib.Path` or :class:`geopandas.GeoDataFrame` The source vector data to tile. Must either be a path to a GeoJSON or a :class:`geopandas.GeoDataFrame`. tile_bounds : list @@ -152,7 +152,7 @@ def tile_generator( Arguments --------- - src : `str` or :class:`geopandas.GeoDataFrame` + src : `str`, :class:`pathlib.Path` or :class:`geopandas.GeoDataFrame` The source vector data to tile. Must either be a path to a GeoJSON or a :class:`geopandas.GeoDataFrame`. tile_bounds : list diff --git a/solaris/utils/core.py b/solaris/utils/core.py index bd46ccda..006733ff 100644 --- a/solaris/utils/core.py +++ b/solaris/utils/core.py @@ -1,5 +1,6 @@ import os from distutils.version import LooseVersion +from pathlib import Path from warnings import warn import geopandas as gpd @@ -17,7 +18,7 @@ def _check_rasterio_im_load(im): """Check if `im` is already loaded in; if not, load it in.""" - if isinstance(im, str): + if isinstance(im, (str, Path)): return rasterio.open(im) elif isinstance(im, rasterio.DatasetReader): return im @@ -27,7 +28,7 @@ def _check_rasterio_im_load(im): def _check_skimage_im_load(im): """Check if `im` is already loaded in; if not, load it in.""" - if isinstance(im, str): + if isinstance(im, (str, Path)): return skimage.io.imread(im) elif isinstance(im, np.ndarray): return im @@ -39,8 +40,8 @@ def _check_skimage_im_load(im): def _check_df_load(df): """Check if `df` is already loaded in, if not, load from file.""" - if isinstance(df, str): - if df.lower().endswith("json"): + if isinstance(df, (str, Path)): + if str(df).lower().endswith("json"): return _check_gdf_load(df) else: return pd.read_csv(df) @@ -52,11 +53,11 @@ def _check_df_load(df): def _check_gdf_load(gdf): """Check if `gdf` is already loaded in, if not, load from geojson.""" - if isinstance(gdf, str): + if isinstance(gdf, (str, Path)): # as of geopandas 0.6.2, using the OGR CSV driver requires some add'nal # kwargs to create a valid geodataframe with a geometry column. see # https://github.com/geopandas/geopandas/issues/1234 - if gdf.lower().endswith("csv"): + if str(gdf).lower().endswith("csv"): return gpd.read_file( gdf, GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO" ) @@ -117,7 +118,7 @@ def get_data_paths(path, infer=False): Arguments --------- - path : str + path : str or :class:`pathlib.Path Path to a .CSV-formatted reference file defining the location of training, validation, or inference data. See docs for details. infer : bool, optional @@ -142,7 +143,7 @@ def get_data_paths(path, infer=False): def get_files_recursively(path, traverse_subdirs=False, extension=".tif"): """Get files from subdirs of `path`, joining them to the dir.""" if traverse_subdirs: - walker = os.walk(path) + walker = os.walk(str(path)) path_list = [] for step in walker: if not step[2]: # if there are no files in the current dir diff --git a/solaris/utils/data.py b/solaris/utils/data.py index 966cea90..bde24c62 100644 --- a/solaris/utils/data.py +++ b/solaris/utils/data.py @@ -33,19 +33,19 @@ def make_dataset_csv( Arguments --------- - im_dir : str + im_dir : str or :class:`pathlib.Path` The path to the directory containing images to be used by your model. Images in sub-directories can be included by setting ``recursive=True``. im_ext : str, optional The file extension used by your images. Defaults to ``"tif"``. Not case sensitive. - label_dir : str, optional + label_dir : str or :class:`pathlib.Path`, optional The path to the directory containing images to be used by your model. Images in sub-directories can be included by setting ``recursive=True``. This argument is required if `stage` is ``"train"`` (default) or ``"val"``, but has no effect if `stage` is ``"infer"``. - output_path : str, optional + output_path : str or :class:`pathlib.Path`, optional The path to save the generated CSV to. Defaults to ``"dataset.csv"``. stage : str, optional The stage that the csv is generated for. Can be ``"train"`` (default), diff --git a/solaris/utils/geo.py b/solaris/utils/geo.py index 7167408f..4aa0e293 100644 --- a/solaris/utils/geo.py +++ b/solaris/utils/geo.py @@ -1,6 +1,7 @@ import json import os import sys +from pathlib import Path from warnings import warn import geopandas as gpd @@ -52,7 +53,7 @@ def reproject( Arguments --------- - input_object : `str` or :class:`rasterio.DatasetReader` or :class:`geopandas.GeoDataFrame` + input_object : `str`, :class:`pathlib.Path`, :class:`Rasterio.DatasetReader` or :class:`geopandas.GeoDataFrame` An object to transform to a new CRS. If a string, it must be a path to a georegistered image or vector dataset (e.g. a .GeoJSON). If the object itself does not contain georeferencing information, the @@ -73,7 +74,7 @@ def reproject( `target_crs` is provided, the input will be projected into the appropriate UTM zone. `target_crs` takes precedence if both it and `target_object` are provided. - dest_path : str, optional + dest_path : `str` or :class:`pathlib.Path`, optional The path to save the output to (if desired). resampling_method : str, optional The resampling method to use during reprojection of raster data. **Only @@ -261,11 +262,11 @@ def get_crs(obj): def _parse_geo_data(input): - if isinstance(input, str): - if input.lower().endswith("json") or input.lower().endswith("csv"): + if isinstance(input, (str, Path)): + if str(input).lower().endswith("json") or str(input).lower().endswith("csv"): input_type = "vector" input_data = _check_df_load(input) - elif input.lower().endswith("tif") or input.lower().endswith("tiff"): + elif str(input).lower().endswith("tif") or str(input).lower().endswith("tiff"): input_type = "raster" input_data = _check_rasterio_im_load(input) else: @@ -299,7 +300,7 @@ def reproject_geometry(input_geom, input_crs=None, target_crs=None, affine_obj=N The target coordinate reference system to re-project the geometry into. If not provided, the appropriate UTM zone will be selected by default, unless `affine_transform` is provided (and therefore CRSs are ignored.) - affine_transform : :class:`affine.Affine`, optional + affine_obj : :class:`affine.Affine`, optional An :class:`affine.Affine` object (or a ``[a, b, c, d, e, f]`` list to convert to that format) to use for transformation. Has no effect unless `input_crs` **and** `target_crs` are not provided. @@ -372,7 +373,7 @@ def raster_get_projection_unit(image): Arguments --------- - image : raster image, GeoTIFF or other format + image : `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader` A raster file with georeferencing Notes @@ -807,7 +808,7 @@ def split_geom( resolution: `tuple` of `float`s, optional (x resolution, y resolution). Used by default if use_metric_size is False. Can be acquired from rasterio dataset object's metadata. - src_img: `str` or `raster`, optional + src_img: `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader`, optional A rasterio raster object or path to a geotiff. The bounds of this raster and the geometry will be intersected and the result of the intersection will be tiled. Useful in cases where the extent of collected labels and source imagery partially overlap. The src_img must have the same projection units diff --git a/solaris/utils/io.py b/solaris/utils/io.py index df151b2b..571ad8b3 100644 --- a/solaris/utils/io.py +++ b/solaris/utils/io.py @@ -20,7 +20,7 @@ def imread( Arguments --------- - path : str + path : `str`, :class:`pathlib.Path` or :class:`Rasterio.DatasetReader` Path to the image file to load. make_8bit : bool, optional Should the image be converted to an 8-bit format? Defaults to False. diff --git a/solaris/vector/mask.py b/solaris/vector/mask.py index 903dbcd8..d16f2a11 100644 --- a/solaris/vector/mask.py +++ b/solaris/vector/mask.py @@ -62,11 +62,11 @@ def df_to_px_mask( maximum spacing between polygons to be labeled. Each channel correspond to its own `shape` plane in the output. - out_file : str, optional + out_file : str or :class:`pathlib.Path`, optional Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored. @@ -189,7 +189,7 @@ def footprint_mask( Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored. @@ -307,7 +307,7 @@ def boundary_mask( Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored @@ -403,7 +403,7 @@ def contact_mask( Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). - reference_im : :class:`rasterio.DatasetReader` or `str`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional An image to extract necessary coordinate information from: the affine transformation matrix, the image extent, etc. If provided, `affine_obj` and `shape` are ignored. @@ -547,7 +547,7 @@ def road_mask( the function will attempt to transform to the relevant CRS using ``df.to_crs()`` (if `df` is a :class:`geopandas.GeoDataFrame`) or using the data provided in `reference_im` (if not). - out_file : str, optional + out_file : str or :class:`pathlib.Path`, optional Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). @@ -580,7 +580,7 @@ def road_mask( burn_field : str, optional Name of a column in `df` that provides values for `burn_value` for each independent object. If provided, `burn_value` is ignored. - min_background_val : int + min_background_value : int Minimum value for mask background. Optional, ignore if ``None``. Defaults to ``None``. verbose : str, optional @@ -670,7 +670,7 @@ def buffer_df_geoms( meters : bool, optional Should buffers be in pixel units (default) or metric units (if `meters` is ``True``)? - reference_im : `str` or :class:`rasterio.DatasetReader`, optional + reference_im : `str`, :class:`pathlib.Path` or :class:`rasterio.DatasetReader`, optional The path to a reference image covering the same geographic extent as the area labeled in `df`. Provided for georeferencing of pixel coordinate geometries in `df` or conversion of georeferenced geometries @@ -830,11 +830,11 @@ def mask_to_poly_geojson( If not provided, no scaling will be performend and channels will be summed. - reference_im : str, optional + reference_im : str or :class:`pathlib.Path`, optional The path to a reference geotiff to use for georeferencing the polygons in the mask. Required if saving to a GeoJSON (see the ``output_type`` argument), otherwise only required if ``do_transform=True``. - output_path : str, optional + output_path : str or :class:`pathlib.Path`, optional Path to save the output file to. If not provided, no file is saved. output_type : ``'csv'`` or ``'geojson'``, optional If ``output_path`` is provided, this argument defines what type of file @@ -953,7 +953,7 @@ def instance_mask( with a column containing geometries (identified by `geom_col`). If the geometries in `df` are not in pixel coordinates, then `affine` or `reference_im` must be passed to provide the transformation to convert. - out_file : str, optional + out_file : str or :class:`pathlib.Path`, optional Path to an image file to save the output to. Must be compatible with :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be provided (for metadata purposes). diff --git a/solaris/vector/polygon.py b/solaris/vector/polygon.py index f5452ef0..11039690 100644 --- a/solaris/vector/polygon.py +++ b/solaris/vector/polygon.py @@ -1,5 +1,6 @@ import os import shutil +from pathlib import Path import geopandas as gpd import pandas as pd @@ -25,7 +26,7 @@ def convert_poly_coords( geom : :class:`shapely.geometry.shape` or str A :class:`shapely.geometry.shape`, or WKT string-formatted geometry object currently in pixel coordinates. - raster_src : str, optional + raster_src : str or :class:`pathlib.Path`, optional Path to a raster image with georeferencing data to apply to `geom`. Alternatively, an opened :class:`rasterio.Band` object or :class:`osgeo.gdal.Dataset` object can be provided. Required if not @@ -164,7 +165,7 @@ def georegister_px_df( df : :class:`pandas.DataFrame` A :class:`pandas.DataFrame` with polygons in a column named ``"geometry"``. - im_path : str, optional + im_path : str or :class:`pathlib.Path`, optional A filename or :class:`rasterio.DatasetReader` object containing an image that has the same bounds as the pixel coordinates in `df`. If not provided, `affine_obj` and `crs` must both be provided. @@ -223,11 +224,11 @@ def geojson_to_px_gdf( Arguments --------- - geojson : str + geojson : str or :class:`pathlib.Path` Path to a geojson. This function will also accept a :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` with a column named ``'geometry'`` in this argument. - im_path : str + im_path : str or :class:`pathlib.Path` Path to a georeferenced image (ie a GeoTIFF) that geolocates to the same geography as the `geojson`(s). This function will also accept a :class:`osgeo.gdal.Dataset` or :class:`rasterio.DatasetReader` with @@ -299,7 +300,7 @@ def get_overlapping_subset(gdf, im=None, bbox=None, bbox_crs=None): --------- gdf : :class:`geopandas.GeoDataFrame` A :class:`geopandas.GeoDataFrame` instance or a path to a geojson. - im : :class:`rasterio.DatasetReader` or `str`, optional + im : :class:`rasterio.DatasetReader`, `str` or :class:`pathlib.Path`, optional An image object loaded with `rasterio` or a path to a georeferenced image (i.e. a GeoTIFF). bbox : `list` or :class:`shapely.geometry.Polygon`, optional @@ -369,19 +370,19 @@ def gdf_to_yolo( Arguments --------- - geodataframe : str + geodataframe : str or :class:`pathlib.Path` Path to a :class:`geopandas.GeoDataFrame` with a column named ``'geometry'``. Can be created from a geojson with labels for unique objects. Can be converted to this format with ``geodataframe=gpd.read_file("./xView_30.geojson")``. - im_path : str + image : str or :class:`pathlib.Path` Path to a georeferenced image (ie a GeoTIFF or png created with GDAL) that geolocates to the same geography as the `geojson`(s). If a directory, the bounds of each GeoTIFF will be loaded in and all overlapping geometries will be transformed. This function will also accept a :class:`osgeo.gdal.Dataset` or :class:`rasterio.DatasetReader` with georeferencing information in this argument. - output_dir : str + output_dir : str or :class:`pathlib.Path` Path to an output directory where all of the yolo readable text files will be placed. column : str, optional @@ -419,9 +420,9 @@ def gdf_to_yolo( dw = 1.0 / im_size[0] dh = 1.0 / im_size[1] header = [column, "x", "y", "w", "h"] - if os.path.isdir(output_dir) is False: - os.mkdir(output_dir) - output = os.path.join(output_dir, image.split(".png")[0] + ".txt") + output_dir = Path(output_dir) + output_dir.mkdir(exist_ok=True) + output = output_dir / f"{Path(image).stem}.txt" gdf = geojson_to_px_gdf(geodataframe, image, precision=None) gdf["area"] = gdf["geometry"].area gdf["intersection"] = gdf["geometry"].intersection(pix_poly).area / gdf["area"] From 31c5b7e28bfbdceaa28ded22d0c434482b4107c2 Mon Sep 17 00:00:00 2001 From: remtav Date: Wed, 8 Jun 2022 11:21:00 -0400 Subject: [PATCH 7/8] base.py: - refactor Evaluator's __init__ to better use existing utilities - load_truth() uses dedicated _check_gdf_load() function core.py: _check_gdf_load() returns empty gdf with sindex if error raised --- solaris/eval/base.py | 23 ++++++++++------------- solaris/utils/core.py | 4 +++- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/solaris/eval/base.py b/solaris/eval/base.py index 801bea90..511de1da 100644 --- a/solaris/eval/base.py +++ b/solaris/eval/base.py @@ -6,6 +6,7 @@ import shapely.wkt from fiona._err import CPLE_OpenFailedError from fiona.errors import DriverError +from solaris.utils.core import _check_gdf_load from tqdm.auto import tqdm from . import iou @@ -37,21 +38,22 @@ class Evaluator: def __init__(self, ground_truth_vector_file): # Load Ground Truth : Ground Truth should be in geojson or shape file - try: - if str(ground_truth_vector_file).lower().endswith("json"): - self.load_truth(ground_truth_vector_file) - elif str(ground_truth_vector_file).lower().endswith("csv"): - self.load_truth(ground_truth_vector_file, truthCSV=True) + if isinstance(ground_truth_vector_file, (str, Path)): self.ground_truth_fname = str(ground_truth_vector_file) - except AttributeError: # handles passing gdf instead of path to file - self.ground_truth_GDF = ground_truth_vector_file + else: self.ground_truth_fname = "GeoDataFrame variable" + + if isinstance(ground_truth_vector_file, (str, Path)) and ground_truth_vector_file.lower().endswith("csv"): + self.load_truth(ground_truth_vector_file, truthCSV=True) + else: + self.load_truth(ground_truth_vector_file) self.ground_truth_sindex = self.ground_truth_GDF.sindex # get sindex # create deep copy of ground truth file for calculations self.ground_truth_GDF_Edit = self.ground_truth_GDF.copy(deep=True) self.proposal_GDF = gpd.GeoDataFrame([]) # initialize proposal GDF def __repr__(self): + return "Evaluator {}".format(os.path.split(self.ground_truth_fname)[-1]) def get_iou_by_building(self): @@ -618,12 +620,7 @@ def load_truth( ], ) else: - try: - self.ground_truth_GDF = gpd.read_file(ground_truth_vector_file) - except (CPLE_OpenFailedError, DriverError): # empty geojson - self.ground_truth_GDF = gpd.GeoDataFrame( - {"sindex": [], "condition": [], "geometry": []} - ) + self.ground_truth_GDF = _check_gdf_load(ground_truth_vector_file) # force calculation of spatialindex self.ground_truth_sindex = self.ground_truth_GDF.sindex # create deep copy of ground truth file for calculations diff --git a/solaris/utils/core.py b/solaris/utils/core.py index 006733ff..633e0aa9 100644 --- a/solaris/utils/core.py +++ b/solaris/utils/core.py @@ -69,7 +69,9 @@ def _check_gdf_load(gdf): " path or it isn't a valid vector file. Returning an empty" " GeoDataFrame." ) - return gpd.GeoDataFrame() + return gpd.GeoDataFrame( + {"sindex": [], "condition": [], "geometry": []} + ) elif isinstance(gdf, gpd.GeoDataFrame): return gdf else: From 1d0f6f727cd2b97c3b1f276af7f1625096750f9b Mon Sep 17 00:00:00 2001 From: remtav Date: Wed, 8 Jun 2022 12:02:01 -0400 Subject: [PATCH 8/8] test_core.py: add tests from pathlib.Path objects (necessary?) --- tests/test_utils/test_core.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/test_utils/test_core.py b/tests/test_utils/test_core.py index 5356a19a..1de92bb7 100644 --- a/tests/test_utils/test_core.py +++ b/tests/test_utils/test_core.py @@ -1,4 +1,5 @@ import os +from pathlib import Path import geopandas as gpd import numpy as np @@ -26,6 +27,13 @@ def test_unloaded_geojson(self): assert truth_gdf.equals(test_gdf) + def test_unloaded_geojson_from_pathlib(self): + geojson_path = Path(data_dir) / "sample.geojson" + truth_gdf = gpd.read_file(geojson_path) + test_gdf = _check_gdf_load(geojson_path) + + assert truth_gdf.equals(test_gdf) + def test_loaded_geojson(self): geojson_path = os.path.join(data_dir, "sample.geojson") truth_gdf = gpd.read_file(geojson_path) @@ -40,6 +48,13 @@ def test_unloaded_df(self): assert truth_df.equals(test_df) + def test_unloaded_df_from_pathlib(self): + csv_path = Path(data_dir) / "sample.csv" + truth_df = pd.read_csv(csv_path) + test_df = _check_df_load(csv_path) + + assert truth_df.equals(test_df) + def test_loaded_df(self): csv_path = os.path.join(data_dir, "sample.csv") truth_df = pd.read_csv(csv_path) @@ -58,6 +73,17 @@ def test_unloaded_image(self): truth_im.close() # need to close the rasterio datasetreader objects test_im.close() + def test_unloaded_image_from_pathlib(self): + im_path = Path(data_dir) / "sample_geotiff.tif" + truth_im = rasterio.open(im_path) + test_im = _check_rasterio_im_load(im_path) + + assert truth_im.profile == test_im.profile + assert np.array_equal(truth_im.read(1), test_im.read(1)) + + truth_im.close() # need to close the rasterio datasetreader objects + test_im.close() + def test_loaded_image(self): im_path = os.path.join(data_dir, "sample_geotiff.tif") truth_im = rasterio.open(im_path)