Skip to content

Commit

Permalink
Merge pull request #408 from davidhassell/dask-get-filenames-2
Browse files Browse the repository at this point in the history
dask: `Dask.get_filenames`
  • Loading branch information
sadielbartholomew authored Jun 14, 2022
2 parents 719fe86 + bc2bbce commit 032eabd
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 46 deletions.
43 changes: 1 addition & 42 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
_DEPRECATION_ERROR_KWARGS,
_numpy_isclose,
_section,
abspath,
atol,
default_netCDF_fillvals,
free_memory,
Expand All @@ -39,7 +38,6 @@
)
from ..mixin_container import Container
from ..units import Units
from . import FileArray
from .collapse import Collapse
from .creation import compressed_to_dask, generate_axis_identifiers, to_dask
from .dask_utils import (
Expand Down Expand Up @@ -135,7 +133,7 @@ def wrapper(*args, **kwargs):
_DEFAULT_HARDMASK = True


class Data(Container, cfdm.Data, DataClassDeprecationsMixin):
class Data(DataClassDeprecationsMixin, Container, cfdm.Data):
"""An N-dimensional data array with units and masked values.
* Contains an N-dimensional, indexable and broadcastable array with
Expand Down Expand Up @@ -7734,45 +7732,6 @@ def insert_dimension(self, position=0, inplace=False):

return d

@daskified(_DASKIFIED_VERBOSE)
def get_filenames(self):
"""Return the names of files containing parts of the data array.
:Returns:
`set`
The file names in normalized, absolute form. If the
data is in memory then an empty `set` is returned.
**Examples**
>>> f = cf.NetCDFArray(TODODASK)
>>> d = cf.Data(f)
>>> d.get_filenames()
{TODODASK}
>>> d = cf.Data([1, 2, 3])
>>> d.get_filenames()
set()
"""
out = set()

dx = self.to_dask_array()
hlg = dx.dask
dsk = hlg.to_dict()
for key, value in hlg.get_all_dependencies().items():
if value:
continue

# This key has no dependencies, and so is raw data.
a = dsk[key]
if isinstance(a, FileArray):
out.add(abspath(a.get_filename()))

out.discard(None)
return out

@daskified(_DASKIFIED_VERBOSE)
@_deprecated_kwarg_check("size")
@_inplace_enabled(default=False)
Expand Down
17 changes: 17 additions & 0 deletions cf/data/mixin/deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,23 @@ def close(self):
removed_at="5.0.0",
) # pragma: no cover

def get_filenames(self):
"""Return the names of files containing parts of the data array.
Deprecated at version TODODASK.
:Returns:
`set`
The file names in normalized, absolute form. If the
data is in memory then an empty `set` is returned.
"""
raise DeprecationError(
"Data method 'get_filenames' has been deprecated at "
"version TODODASK and is not available."
) # pragma: no cover

def chunk(self, chunksize=None, total=None, omit_axes=None, pmshape=None):
"""Partition the data array.
Expand Down
4 changes: 0 additions & 4 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3810,10 +3810,6 @@ def test_Data_flat(self):
list(d.flat(ignore_masked=False)), [1, np.ma.masked, 3, 4]
)

@unittest.skipIf(TEST_DASKIFIED_ONLY, "Needs updated NetCDFArray to test")
def test_Data_get_filenames(self):
pass

def test_Data_tolist(self):
for x in (1, [1, 2], [[1, 2], [3, 4]]):
d = cf.Data(x)
Expand Down

0 comments on commit 032eabd

Please sign in to comment.