Add tests that we can handle pathlib.Path. (#235)

* Add tests that we can handle pathlib.Path. * Require hipscat version
astronomy-commons · Feb 26, 2024 · b571ef8 · b571ef8
1 parent 5467c0b
commit b571ef8
Show file tree

Hide file tree

Showing 8 changed files with 20 additions and 18 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
     "dask[distributed]",
     "deprecated",
     "healpy",
-    "hipscat >= 0.2.4",
+    "hipscat >= 0.2.6",
     "ipykernel", # Support for Jupyter notebooks
     "pandas < 2.1.0",
     "pyarrow",

diff --git a/src/hipscat_import/catalog/arguments.py b/src/hipscat_import/catalog/arguments.py
@@ -136,7 +136,7 @@ def additional_runtime_provenance_info(self) -> dict:
             "catalog_name": self.output_artifact_name,
             "epoch": self.epoch,
             "catalog_type": self.catalog_type,
-            "input_path": str(self.input_path),
+            "input_path": self.input_path,
             "input_paths": self.input_paths,
             "input_file_list": self.input_file_list,
             "ra_column": self.ra_column,

diff --git a/src/hipscat_import/index/arguments.py b/src/hipscat_import/index/arguments.py
@@ -67,17 +67,17 @@ def to_catalog_info(self, total_rows) -> IndexCatalogInfo:
             "catalog_name": self.output_artifact_name,
             "total_rows": total_rows,
             "catalog_type": "index",
-            "primary_catalog": str(self.input_catalog_path),
+            "primary_catalog": self.input_catalog_path,
             "indexing_column": self.indexing_column,
             "extra_columns": self.extra_columns,
         }
         return IndexCatalogInfo(**info)
 
     def additional_runtime_provenance_info(self) -> dict:
         return {
-            "input_catalog_path": str(self.input_catalog_path),
+            "input_catalog_path": self.input_catalog_path,
             "indexing_column": self.indexing_column,
             "extra_columns": self.extra_columns,
-            "include_hipscat_index": str(self.include_hipscat_index),
+            "include_hipscat_index": self.include_hipscat_index,
             "include_order_pixel": self.include_order_pixel,
         }
diff --git a/src/hipscat_import/margin_cache/margin_cache_arguments.py b/src/hipscat_import/margin_cache/margin_cache_arguments.py
@@ -71,7 +71,7 @@ def to_catalog_info(self, total_rows) -> MarginCacheCatalogInfo:
 
     def additional_runtime_provenance_info(self) -> dict:
         return {
-            "input_catalog_path": str(self.input_catalog_path),
+            "input_catalog_path": self.input_catalog_path,
             "margin_threshold": self.margin_threshold,
             "margin_order": self.margin_order,
         }
diff --git a/src/hipscat_import/runtime_arguments.py b/src/hipscat_import/runtime_arguments.py
@@ -107,15 +107,15 @@ def provenance_info(self) -> dict:
         """
         runtime_args = {
             "catalog_name": self.output_artifact_name,
-            "output_path": str(self.output_path),
+            "output_path": self.output_path,
             "output_artifact_name": self.output_artifact_name,
-            "tmp_dir": str(self.tmp_dir),
+            "tmp_dir": self.tmp_dir,
             "overwrite": self.overwrite,
-            "dask_tmp": str(self.dask_tmp),
+            "dask_tmp": self.dask_tmp,
             "dask_n_workers": self.dask_n_workers,
             "dask_threads_per_worker": self.dask_threads_per_worker,
             "catalog_path": self.catalog_path,
-            "tmp_path": str(self.tmp_path),
+            "tmp_path": self.tmp_path,
         }
 
         runtime_args.update(self.additional_runtime_provenance_info())

diff --git a/src/hipscat_import/soap/arguments.py b/src/hipscat_import/soap/arguments.py
@@ -58,9 +58,9 @@ def to_catalog_info(self, total_rows) -> AssociationCatalogInfo:
             "catalog_type": CatalogType.ASSOCIATION,
             "total_rows": total_rows,
             "primary_column": self.object_id_column,
-            "primary_catalog": str(self.object_catalog_dir),
+            "primary_catalog": self.object_catalog_dir,
             "join_column": self.source_object_id_column,
-            "join_catalog": str(self.source_catalog_dir),
+            "join_catalog": self.source_catalog_dir,
             "contains_leaf_files": self.write_leaf_files,
         }
         return AssociationCatalogInfo(**info)

diff --git a/src/hipscat_import/verification/arguments.py b/src/hipscat_import/verification/arguments.py
@@ -42,6 +42,6 @@ def _check_arguments(self):
     def additional_runtime_provenance_info(self) -> dict:
         return {
             "pipeline": "verification pipeline",
-            "input_catalog_path": str(self.input_catalog_path),
+            "input_catalog_path": self.input_catalog_path,
             "field_distribution_cols": self.field_distribution_cols,
         }
diff --git a/tests/hipscat_import/catalog/test_run_round_trip.py b/tests/hipscat_import/catalog/test_run_round_trip.py
@@ -6,6 +6,7 @@
 
 import glob
 import os
+from pathlib import Path
 
 import numpy as np
 import numpy.testing as npt
@@ -68,17 +69,18 @@ def test_import_mixed_schema_csv(
     - the two input files in `mixed_schema_csv_dir` have different *implied* schemas
         when parsed by pandas. this verifies that they end up with the same schema
         and can be combined into a single parquet file.
+    - this additionally uses pathlib.Path for all path inputs.
     """
     args = ImportArguments(
         output_artifact_name="mixed_csv_bad",
         input_file_list=[
-            os.path.join(mixed_schema_csv_dir, "input_01.csv"),
-            os.path.join(mixed_schema_csv_dir, "input_02.csv"),
+            Path(mixed_schema_csv_dir) / "input_01.csv",
+            Path(mixed_schema_csv_dir) / "input_02.csv",
         ],
-        output_path=tmp_path,
-        dask_tmp=tmp_path,
+        output_path=Path(tmp_path),
+        dask_tmp=Path(tmp_path),
         highest_healpix_order=1,
-        file_reader=get_file_reader("csv", chunksize=1, schema_file=mixed_schema_csv_parquet),
+        file_reader=get_file_reader("csv", chunksize=1, schema_file=Path(mixed_schema_csv_parquet)),
         progress_bar=False,
     )