Skip to content

Commit

Permalink
Add tests that we can handle pathlib.Path. (#235)
Browse files Browse the repository at this point in the history
* Add tests that we can handle pathlib.Path.

* Require hipscat version
  • Loading branch information
delucchi-cmu authored Feb 26, 2024
1 parent 5467c0b commit b571ef8
Show file tree
Hide file tree
Showing 8 changed files with 20 additions and 18 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies = [
"dask[distributed]",
"deprecated",
"healpy",
"hipscat >= 0.2.4",
"hipscat >= 0.2.6",
"ipykernel", # Support for Jupyter notebooks
"pandas < 2.1.0",
"pyarrow",
Expand Down
2 changes: 1 addition & 1 deletion src/hipscat_import/catalog/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def additional_runtime_provenance_info(self) -> dict:
"catalog_name": self.output_artifact_name,
"epoch": self.epoch,
"catalog_type": self.catalog_type,
"input_path": str(self.input_path),
"input_path": self.input_path,
"input_paths": self.input_paths,
"input_file_list": self.input_file_list,
"ra_column": self.ra_column,
Expand Down
6 changes: 3 additions & 3 deletions src/hipscat_import/index/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,17 +67,17 @@ def to_catalog_info(self, total_rows) -> IndexCatalogInfo:
"catalog_name": self.output_artifact_name,
"total_rows": total_rows,
"catalog_type": "index",
"primary_catalog": str(self.input_catalog_path),
"primary_catalog": self.input_catalog_path,
"indexing_column": self.indexing_column,
"extra_columns": self.extra_columns,
}
return IndexCatalogInfo(**info)

def additional_runtime_provenance_info(self) -> dict:
return {
"input_catalog_path": str(self.input_catalog_path),
"input_catalog_path": self.input_catalog_path,
"indexing_column": self.indexing_column,
"extra_columns": self.extra_columns,
"include_hipscat_index": str(self.include_hipscat_index),
"include_hipscat_index": self.include_hipscat_index,
"include_order_pixel": self.include_order_pixel,
}
2 changes: 1 addition & 1 deletion src/hipscat_import/margin_cache/margin_cache_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def to_catalog_info(self, total_rows) -> MarginCacheCatalogInfo:

def additional_runtime_provenance_info(self) -> dict:
return {
"input_catalog_path": str(self.input_catalog_path),
"input_catalog_path": self.input_catalog_path,
"margin_threshold": self.margin_threshold,
"margin_order": self.margin_order,
}
8 changes: 4 additions & 4 deletions src/hipscat_import/runtime_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ def provenance_info(self) -> dict:
"""
runtime_args = {
"catalog_name": self.output_artifact_name,
"output_path": str(self.output_path),
"output_path": self.output_path,
"output_artifact_name": self.output_artifact_name,
"tmp_dir": str(self.tmp_dir),
"tmp_dir": self.tmp_dir,
"overwrite": self.overwrite,
"dask_tmp": str(self.dask_tmp),
"dask_tmp": self.dask_tmp,
"dask_n_workers": self.dask_n_workers,
"dask_threads_per_worker": self.dask_threads_per_worker,
"catalog_path": self.catalog_path,
"tmp_path": str(self.tmp_path),
"tmp_path": self.tmp_path,
}

runtime_args.update(self.additional_runtime_provenance_info())
Expand Down
4 changes: 2 additions & 2 deletions src/hipscat_import/soap/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ def to_catalog_info(self, total_rows) -> AssociationCatalogInfo:
"catalog_type": CatalogType.ASSOCIATION,
"total_rows": total_rows,
"primary_column": self.object_id_column,
"primary_catalog": str(self.object_catalog_dir),
"primary_catalog": self.object_catalog_dir,
"join_column": self.source_object_id_column,
"join_catalog": str(self.source_catalog_dir),
"join_catalog": self.source_catalog_dir,
"contains_leaf_files": self.write_leaf_files,
}
return AssociationCatalogInfo(**info)
Expand Down
2 changes: 1 addition & 1 deletion src/hipscat_import/verification/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,6 @@ def _check_arguments(self):
def additional_runtime_provenance_info(self) -> dict:
return {
"pipeline": "verification pipeline",
"input_catalog_path": str(self.input_catalog_path),
"input_catalog_path": self.input_catalog_path,
"field_distribution_cols": self.field_distribution_cols,
}
12 changes: 7 additions & 5 deletions tests/hipscat_import/catalog/test_run_round_trip.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import glob
import os
from pathlib import Path

import numpy as np
import numpy.testing as npt
Expand Down Expand Up @@ -68,17 +69,18 @@ def test_import_mixed_schema_csv(
- the two input files in `mixed_schema_csv_dir` have different *implied* schemas
when parsed by pandas. this verifies that they end up with the same schema
and can be combined into a single parquet file.
- this additionally uses pathlib.Path for all path inputs.
"""
args = ImportArguments(
output_artifact_name="mixed_csv_bad",
input_file_list=[
os.path.join(mixed_schema_csv_dir, "input_01.csv"),
os.path.join(mixed_schema_csv_dir, "input_02.csv"),
Path(mixed_schema_csv_dir) / "input_01.csv",
Path(mixed_schema_csv_dir) / "input_02.csv",
],
output_path=tmp_path,
dask_tmp=tmp_path,
output_path=Path(tmp_path),
dask_tmp=Path(tmp_path),
highest_healpix_order=1,
file_reader=get_file_reader("csv", chunksize=1, schema_file=mixed_schema_csv_parquet),
file_reader=get_file_reader("csv", chunksize=1, schema_file=Path(mixed_schema_csv_parquet)),
progress_bar=False,
)

Expand Down

0 comments on commit b571ef8

Please sign in to comment.