Skip to content

Commit

Permalink
Fix cache check time and drop_variables (#144)
Browse files Browse the repository at this point in the history
* fix cache check time and drop_variables

* fix test open_dataset zarr

* change salient cron schedule
  • Loading branch information
danangmassandy authored Sep 6, 2024
1 parent 2c1be89 commit dd77bca
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 9 deletions.
4 changes: 2 additions & 2 deletions django_project/core/celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
},
'salient-collector-session': {
'task': 'salient_collector_session',
# Run everyday at 6am East Africa Time or 02:00 UTC
'schedule': crontab(minute='0', hour='2'),
# Run every Monday 02:00 UTC
'schedule': crontab(minute='0', hour='2', day_of_week='1'),
},
}

Expand Down
9 changes: 6 additions & 3 deletions django_project/gap/tests/utils/test_zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ def test_open_dataset(
mock_open_zarr.return_value = mock_dataset

source_file = DataSourceFile(name='test_dataset.zarr')
source_file.metadata = {
'drop_variables': ['test']
}
self.reader.setup_reader()
result = self.reader.open_dataset(source_file)

Expand All @@ -138,15 +141,15 @@ def test_open_dataset(
target_protocol='s3',
target_options=self.reader.s3_options,
cache_storage=f'/tmp/{cache_filename}',
cache_check=10,
expiry_time=3600,
cache_check=3600,
expiry_time=86400,
target_kwargs={'s3': mock_s3fs_instance}
)
mock_fs_instance.get_mapper.assert_called_once_with(
's3://test-bucket/test-prefix/test_dataset.zarr')
mock_open_zarr.assert_called_once_with(
mock_fs_instance.get_mapper.return_value,
consolidated=True)
consolidated=True, drop_variables=['test'])

@patch('gap.utils.zarr.BaseZarrReader.get_s3_variables')
@patch('gap.utils.zarr.BaseZarrReader.get_s3_client_kwargs')
Expand Down
14 changes: 10 additions & 4 deletions django_project/gap/utils/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,21 @@ def open_dataset(self, source_file: DataSourceFile) -> xrDataset:
target_protocol='s3',
target_options=self.s3_options,
cache_storage=self.get_zarr_cache_dir(source_file.name),
cache_check=10,
expiry_time=3600,
cache_check=3600,
expiry_time=86400,
target_kwargs={
's3': s3_fs
}
)

# create fsspec mapper file list
s3_mapper = fs.get_mapper(zarr_url)

drop_variables = []
if source_file.metadata:
drop_variables = source_file.metadata.get(
'drop_variables', [])
# open zarr, use consolidated to read the metadata
return xr.open_zarr(s3_mapper, consolidated=True)
ds = xr.open_zarr(
s3_mapper, consolidated=True, drop_variables=drop_variables)

return ds

0 comments on commit dd77bca

Please sign in to comment.