diff --git a/tests/tests.yaml b/tests/tests.yaml index 35e5720a633..a33bb38eee7 100644 --- a/tests/tests.yaml +++ b/tests/tests.yaml @@ -167,6 +167,9 @@ answer_tests: #local_particle_trajectory_001: # - yt/data_objects/tests/test_particle_trajectories.py + + local_nc4_cm1_000: # PR 2176 + - yt/frontends/nc4_cm1/tests/test_outputs.py:test_cm1_mesh_fields other_tests: unittests: diff --git a/yt/frontends/api.py b/yt/frontends/api.py index 982d664742a..7f163fe7491 100644 --- a/yt/frontends/api.py +++ b/yt/frontends/api.py @@ -30,6 +30,7 @@ "halo_catalog", "http_stream", "moab", + "nc4_cm1", "open_pmd", "owls", "owls_subfind", diff --git a/yt/frontends/nc4_cm1/__init__.py b/yt/frontends/nc4_cm1/__init__.py new file mode 100644 index 00000000000..7a4dbb7a31b --- /dev/null +++ b/yt/frontends/nc4_cm1/__init__.py @@ -0,0 +1,6 @@ +""" +API for yt.frontends.nc4_cm1 + + + +""" diff --git a/yt/frontends/nc4_cm1/api.py b/yt/frontends/nc4_cm1/api.py new file mode 100644 index 00000000000..f7acb342075 --- /dev/null +++ b/yt/frontends/nc4_cm1/api.py @@ -0,0 +1,10 @@ +""" +API for yt.frontends.nc4_cm1 + + + +""" + +from .data_structures import CM1Dataset, CM1Grid, CM1Hierarchy +from .fields import CM1FieldInfo +from .io import CM1IOHandler diff --git a/yt/frontends/nc4_cm1/data_structures.py b/yt/frontends/nc4_cm1/data_structures.py new file mode 100644 index 00000000000..af800b5b313 --- /dev/null +++ b/yt/frontends/nc4_cm1/data_structures.py @@ -0,0 +1,215 @@ +import os +import stat +import weakref +from collections import OrderedDict + +import numpy as np + +from yt.data_objects.index_subobjects.grid_patch import AMRGridPatch +from yt.data_objects.static_output import Dataset +from yt.geometry.grid_geometry_handler import GridIndex +from yt.utilities.file_handler import NetCDF4FileHandler, warn_netcdf +from yt.utilities.logger import ytLogger as mylog + +from .fields import CM1FieldInfo + + +class CM1Grid(AMRGridPatch): + _id_offset = 0 + + def __init__(self, id, index, level, dimensions): + super(CM1Grid, self).__init__(id, filename=index.index_filename, index=index) + self.Parent = None + self.Children = [] + self.Level = level + self.ActiveDimensions = dimensions + + def __repr__(self): + return f"CM1Grid_{self.id:d} ({self.ActiveDimensions})" + + +class CM1Hierarchy(GridIndex): + grid = CM1Grid + + def __init__(self, ds, dataset_type="cm1"): + self.dataset_type = dataset_type + self.dataset = weakref.proxy(ds) + # for now, the index file is the dataset! + self.index_filename = self.dataset.parameter_filename + self.directory = os.path.dirname(self.index_filename) + # float type for the simulation edges and must be float64 now + self.float_type = np.float64 + super(CM1Hierarchy, self).__init__(ds, dataset_type) + + def _detect_output_fields(self): + # build list of on-disk fields for dataset_type 'cm1' + vnames = self.dataset.parameters["variable_names"] + self.field_list = [("cm1", vname) for vname in vnames] + + def _count_grids(self): + # This needs to set self.num_grids + self.num_grids = 1 + + def _parse_index(self): + self.grid_left_edge[0][:] = self.ds.domain_left_edge[:] + self.grid_right_edge[0][:] = self.ds.domain_right_edge[:] + self.grid_dimensions[0][:] = self.ds.domain_dimensions[:] + self.grid_particle_count[0][0] = 0 + self.grid_levels[0][0] = 1 + self.max_level = 1 + + def _populate_grid_objects(self): + self.grids = np.empty(self.num_grids, dtype="object") + for i in range(self.num_grids): + g = self.grid(i, self, self.grid_levels.flat[i], self.grid_dimensions[i]) + g._prepare_grid() + g._setup_dx() + self.grids[i] = g + + +class CM1Dataset(Dataset): + _index_class = CM1Hierarchy + _field_info_class = CM1FieldInfo + + def __init__( + self, + filename, + dataset_type="cm1", + storage_filename=None, + units_override=None, + unit_system="mks", + ): + self.fluid_types += ("cm1",) + self._handle = NetCDF4FileHandler(filename) + # refinement factor between a grid and its subgrid + self.refine_by = 1 + super(CM1Dataset, self).__init__( + filename, + dataset_type, + units_override=units_override, + unit_system=unit_system, + ) + self.storage_filename = storage_filename + self.filename = filename + + def _setup_coordinate_handler(self): + # ensure correct ordering of axes so plots aren't rotated (z should always be + # on the vertical axis). + super(CM1Dataset, self)._setup_coordinate_handler() + self.coordinates._x_pairs = (("x", "y"), ("y", "x"), ("z", "x")) + self.coordinates._y_pairs = (("x", "z"), ("y", "z"), ("z", "y")) + + def _set_code_unit_attributes(self): + # This is where quantities are created that represent the various + # on-disk units. These are the currently available quantities which + # should be set, along with examples of how to set them to standard + # values. + with self._handle.open_ds() as _handle: + length_unit = _handle.variables["xh"].units + self.length_unit = self.quan(1.0, length_unit) + self.mass_unit = self.quan(1.0, "kg") + self.time_unit = self.quan(1.0, "s") + self.velocity_unit = self.quan(1.0, "m/s") + self.time_unit = self.quan(1.0, "s") + + def _parse_parameter_file(self): + # This needs to set up the following items. Note that these are all + # assumed to be in code units; domain_left_edge and domain_right_edge + # will be converted to YTArray automatically at a later time. + # This includes the cosmological parameters. + # + # self.unique_identifier <= unique identifier for the dataset + # being read (e.g., UUID or ST_CTIME) + self.unique_identifier = int(os.stat(self.parameter_filename)[stat.ST_CTIME]) + self.parameters = {} # code-specific items + with self._handle.open_ds() as _handle: + # _handle here is a netcdf Dataset object, we need to parse some metadata + # for constructing our yt ds. + + # TO DO: generalize this to be coordiante variable name agnostic in order to + # make useful for WRF or climate data. For now, we're hard coding for CM1 + # specifically and have named the classes appropriately. Additionaly, we + # are only handling the cell-centered grid ("xh","yh","zh") at present. + # The cell-centered grid contains scalar fields and interpolated velocities. + dims = [_handle.dimensions[i].size for i in ["xh", "yh", "zh"]] + xh, yh, zh = [_handle.variables[i][:] for i in ["xh", "yh", "zh"]] + self.domain_left_edge = np.array( + [xh.min(), yh.min(), zh.min()], dtype="float64" + ) + self.domain_right_edge = np.array( + [xh.max(), yh.max(), zh.max()], dtype="float64" + ) + + # loop over the variable names in the netCDF file, record only those on the + # "zh","yh","xh" grid. + varnames = [] + for key, var in _handle.variables.items(): + if all(x in var.dimensions for x in ["time", "zh", "yh", "xh"]): + varnames.append(key) + self.parameters["variable_names"] = varnames + self.parameters["lofs_version"] = _handle.cm1_lofs_version + self.parameters["is_uniform"] = _handle.uniform_mesh + self.current_time = _handle.variables["time"][:][0] + + # record the dimension metadata: __handle.dimensions contains netcdf + # objects so we need to manually copy over attributes. + dim_info = OrderedDict() + for dim, meta in _handle.dimensions.items(): + dim_info[dim] = meta.size + self.parameters["dimensions"] = dim_info + + self.dimensionality = 3 + self.domain_dimensions = np.array(dims, dtype="int64") + self.periodicity = (False, False, False) + + # Set cosmological information to zero for non-cosmological. + self.cosmological_simulation = 0 + self.current_redshift = 0.0 + self.omega_lambda = 0.0 + self.omega_matter = 0.0 + self.hubble_constant = 0.0 + + @classmethod + def _is_valid(cls, filename, *args, **kwargs): + # This accepts a filename or a set of arguments and returns True or + # False depending on if the file is of the type requested. + + warn_netcdf(filename) + try: + nc4_file = NetCDF4FileHandler(filename) + with nc4_file.open_ds(keepweakref=True) as _handle: + is_cm1_lofs = hasattr(_handle, "cm1_lofs_version") + is_cm1 = hasattr(_handle, "cm1 version") # not a typo, it is a space... + + # ensure coordinates of each variable array exists in the dataset + coords = _handle.dimensions # get the dataset wide coordinates + failed_vars = [] # list of failed variables + for var in _handle.variables: # iterate over the variables + vcoords = _handle[var].dimensions # get the dims for the variable + ncoords = len(vcoords) # number of coordinates in variable + # number of coordinates that pass for a variable + coordspassed = sum(vc in coords for vc in vcoords) + if coordspassed != ncoords: + failed_vars.append(var) + + if failed_vars: + mylog.warning( + "Trying to load a cm1_lofs netcdf file but the " + "coordinates of the following fields do not match the " + f"coordinates of the dataset: {failed_vars}" + ) + return False + + if not is_cm1_lofs: + if is_cm1: + mylog.warning( + "It looks like you are trying to load a cm1 netcdf file, " + "but at present yt only supports cm1_lofs output. Until" + " support is added, you can likely use" + " yt.load_uniform_grid() to load your cm1 file manually." + ) + return False + except (OSError, AttributeError, ImportError): + return False + + return True diff --git a/yt/frontends/nc4_cm1/fields.py b/yt/frontends/nc4_cm1/fields.py new file mode 100644 index 00000000000..6b9c299f193 --- /dev/null +++ b/yt/frontends/nc4_cm1/fields.py @@ -0,0 +1,63 @@ +from yt.fields.field_info_container import FieldInfoContainer + +# We need to specify which fields we might have in our dataset. The field info +# container subclass here will define which fields it knows about. There are +# optionally methods on it that get called which can be subclassed. + + +class CM1FieldInfo(FieldInfoContainer): + known_other_fields = ( + # Each entry here is of the form + # ( "name", ("units", ["fields", "to", "alias"], # "display_name")), + ("uinterp", ("m/s", ["velocity_x"], None)), + ("vinterp", ("m/s", ["velocity_y"], None)), + ("winterp", ("m/s", ["velocity_z"], None)), + ("u", ("m/s", ["velocity_x"], None)), + ("v", ("m/s", ["velocity_y"], None)), + ("w", ("m/s", ["velocity_z"], None)), + ("hwin_sr", ("m/s", ["storm_relative_horizontal_wind_speed"], None)), + ("windmag_sr", ("m/s", ["storm_relative_3D_wind_speed"], None)), + ("hwin_gr", ("m/s", ["ground_relative_horizontal_wind_speed"], None)), + ("thpert", ("K", ["potential_temperature_perturbation"], None)), + ("thrhopert", ("K", ["density_potential_temperature_perturbation"], None)), + ("prespert", ("hPa", ["presure_perturbation"], None)), + ("rhopert", ("kg/m**3", ["density_perturbation"], None)), + ("dbz", ("dB", ["simulated_reflectivity"], None)), + ("qvpert", ("g/kg", ["water_vapor_mixing_ratio_perturbation"], None)), + ("qc", ("g/kg", ["cloud_liquid_water_mixing_ratio"], None)), + ("qr", ("g/kg", ["rain_mixing_ratio"], None)), + ("qi", ("g/kg", ["cloud_ice_mixing_ratio"], None)), + ("qs", ("g/kg", ["snow_mixing_ratio"], None)), + ("qg", ("g/kg", ["graupel_or_hail_mixing_ratio"], None)), + ("qcloud", ("g/kg", ["sum_of_cloud_water_and_cloud_ice_mixing_ratios"], None)), + ("qprecip", ("g/kg", ["sum_of_rain_graupel_snow_mixing_ratios"], None)), + ("nci", ("1/cm**3", ["number_concerntration_of_cloud_ice"], None)), + ("ncr", ("1/cm**3", ["number_concentration_of_rain"], None)), + ("ncs", ("1/cm**3", ["number_concentration_of_snow"], None)), + ("ncg", ("1/cm**3", ["number_concentration_of_graupel_or_hail"], None)), + ("xvort", ("1/s", ["vorticity_x"], None)), + ("yvort", ("1/s", ["vorticity_y"], None)), + ("zvort", ("1/s", ["vorticity_z"], None)), + ("hvort", ("1/s", ["horizontal_vorticity_magnitude"], None)), + ("vortmag", ("1/s", ["vorticity_magnitude"], None)), + ("streamvort", ("1/s", ["streamwise_vorticity"], None)), + ("khh", ("m**2/s", ["khh"], None)), + ("khv", ("m**2/s", ["khv"], None)), + ("kmh", ("m**2/s", ["kmh"], None)), + ("kmv", ("m**2/s", ["kmv"], None)), + ) + + known_particle_fields = ( + # Identical form to above + # ( "name", ("units", ["fields", "to", "alias"], # "display_name")), + ) + + def setup_fluid_fields(self): + # Here we do anything that might need info about the dataset. + # You can use self.alias, self.add_output_field (for on-disk fields) + # and self.add_field (for derived fields). + pass + + def setup_particle_fields(self, ptype): + super(CM1FieldInfo, self).setup_particle_fields(ptype) + # This will get called for every particle type. diff --git a/yt/frontends/nc4_cm1/io.py b/yt/frontends/nc4_cm1/io.py new file mode 100644 index 00000000000..fb1a8ff673e --- /dev/null +++ b/yt/frontends/nc4_cm1/io.py @@ -0,0 +1,62 @@ +import numpy as np + +from yt.utilities.file_handler import NetCDF4FileHandler +from yt.utilities.io_handler import BaseIOHandler + + +class CM1IOHandler(BaseIOHandler): + _particle_reader = False + _dataset_type = "cm1" + + def __init__(self, ds): + self.filename = ds.filename + self._handle = NetCDF4FileHandler(self.filename) + super(CM1IOHandler, self).__init__(ds) + + def _read_particle_coords(self, chunks, ptf): + # This needs to *yield* a series of tuples of (ptype, (x, y, z)). + # chunks is a list of chunks, and ptf is a dict where the keys are + # ptypes and the values are lists of fields. + raise NotImplementedError + + def _read_particle_fields(self, chunks, ptf, selector): + # This gets called after the arrays have been allocated. It needs to + # yield ((ptype, field), data) where data is the masked results of + # reading ptype, field and applying the selector to the data read in. + # Selector objects have a .select_points(x,y,z) that returns a mask, so + # you need to do your masking here. + raise NotImplementedError + + def _read_fluid_selection(self, chunks, selector, fields, size): + # This needs to allocate a set of arrays inside a dictionary, where the + # keys are the (ftype, fname) tuples and the values are arrays that + # have been masked using whatever selector method is appropriate. The + # dict gets returned at the end and it should be flat, with selected + # data. Note that if you're reading grid data, you might need to + # special-case a grid selector object. + # Also note that "chunks" is a generator for multiple chunks, each of + # which contains a list of grids. The returned numpy arrays should be + # in 64-bit float and contiguous along the z direction. Therefore, for + # a C-like input array with the dimension [x][y][z] or a + # Fortran-like input array with the dimension (z,y,x), a matrix + # transpose is required (e.g., using np_array.transpose() or + # np_array.swapaxes(0,2)). + + data = {} + offset = 0 + with self._handle.open_ds() as ds: + for field in fields: + data[field] = np.empty(size, dtype="float64") + for chunk in chunks: + for grid in chunk.objs: + variable = ds.variables[field[1]][:][0] + values = np.squeeze(variable.T) + offset += grid.select(selector, values, data[field], offset) + return data + + def _read_chunk_data(self, chunk, fields): + # This reads the data from a single chunk without doing any selection, + # and is only used for caching data that might be used by multiple + # different selectors later. For instance, this can speed up ghost zone + # computation. + pass diff --git a/yt/frontends/nc4_cm1/tests/__init__.py b/yt/frontends/nc4_cm1/tests/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/yt/frontends/nc4_cm1/tests/test_outputs.py b/yt/frontends/nc4_cm1/tests/test_outputs.py new file mode 100644 index 00000000000..fff9f8a7698 --- /dev/null +++ b/yt/frontends/nc4_cm1/tests/test_outputs.py @@ -0,0 +1,69 @@ +from yt.frontends.nc4_cm1.api import CM1Dataset +from yt.testing import assert_equal, requires_file, units_override_check +from yt.utilities.answer_testing.framework import ( + FieldValuesTest, + GridValuesTest, + can_run_ds, + data_dir_load, + requires_ds, + small_patch_amr, +) + +_fields = ("thrhopert", "zvort") +cm1sim = "cm1_tornado_lofs/nc4_cm1_lofs_tornado_test.nc" + + +@requires_ds(cm1sim) +def test_cm1_mesh_fields(): + ds = data_dir_load(cm1sim) + assert_equal(str(ds), "nc4_cm1_lofs_tornado_test.nc") + + # run the small_patch_amr tests on safe fields + ic = ds.domain_center + for test in small_patch_amr(ds, _fields, input_center=ic, input_weight=None): + test_cm1_mesh_fields.__name__ = test.description + yield test + + # manually run the Grid and Field Values tests on dbz (do not want to run the + # ProjectionValuesTest for this field) + if can_run_ds(ds): + dso = [None, ("sphere", (ic, (0.1, "unitary")))] + for field in ["dbz"]: + yield GridValuesTest(ds, field) + for dobj_name in dso: + yield FieldValuesTest(ds, field, dobj_name) + + +@requires_file(cm1sim) +def test_CM1Dataset(): + assert isinstance(data_dir_load(cm1sim), CM1Dataset) + + +@requires_file(cm1sim) +def test_units_override(): + units_override_check(cm1sim) + + +@requires_file(cm1sim) +def test_dims_and_meta(): + ds = data_dir_load(cm1sim) + + known_dims = ["time", "zf", "zh", "yf", "yh", "xf", "xh"] + dims = ds.parameters["dimensions"] + + ## check the file for 2 grids and a time dimension - + ## (time, xf, xh, yf, yh, zf, zh). The dimesions ending in + ## f are the staggered velocity grid components and the + ## dimensions ending in h are the scalar grid components + assert_equal(len(dims), len(known_dims)) + for kdim in known_dims: + assert kdim in dims + + ## check the simulation time + assert_equal(ds.current_time, 5500.0) + + +@requires_file(cm1sim) +def test_if_cm1(): + ds = data_dir_load(cm1sim) + assert float(ds.parameters["lofs_version"]) >= 1.0 diff --git a/yt/utilities/file_handler.py b/yt/utilities/file_handler.py index 58eb99bc296..4ee2fa880b2 100644 --- a/yt/utilities/file_handler.py +++ b/yt/utilities/file_handler.py @@ -108,9 +108,9 @@ def __init__(self, filename): self.filename = filename @contextmanager - def open_ds(self): + def open_ds(self, **kwargs): from yt.utilities.on_demand_imports import _netCDF4 as netCDF4 - ds = netCDF4.Dataset(self.filename) + ds = netCDF4.Dataset(self.filename, mode="r", **kwargs) yield ds ds.close() diff --git a/yt/utilities/on_demand_imports.py b/yt/utilities/on_demand_imports.py index 059b8be78a9..ea68f879e7f 100644 --- a/yt/utilities/on_demand_imports.py +++ b/yt/utilities/on_demand_imports.py @@ -56,6 +56,16 @@ class netCDF4_imports: _name = "netCDF4" _Dataset = None + def __init__(self): + # this ensures the import ordering between netcdf4 and h5py. If h5py is + # imported first, can get file lock errors on some systems (including travis-ci) + # so we need to do this before initializing h5py_imports()! + # similar to this issue https://github.com/pydata/xarray/issues/2560 + try: + import netCDF4 # noqa F401 + except ImportError: + pass + @property def Dataset(self): if self._Dataset is None: