Merge pull request #2176 from keltonhalbert/nc4-griddata

netCDF Front End for Cloud Model Simulations
yt-project · Oct 7, 2020 · 65af8de · 65af8de
2 parents 324d3bb + 12166da
commit 65af8de
Show file tree

Hide file tree

Showing 11 changed files with 441 additions and 2 deletions.
diff --git a/tests/tests.yaml b/tests/tests.yaml
@@ -167,6 +167,9 @@ answer_tests:
 
   #local_particle_trajectory_001:
   #  - yt/data_objects/tests/test_particle_trajectories.py
+
+  local_nc4_cm1_000: # PR  2176
+    - yt/frontends/nc4_cm1/tests/test_outputs.py:test_cm1_mesh_fields
 
 other_tests:
   unittests:

diff --git a/yt/frontends/api.py b/yt/frontends/api.py
@@ -30,6 +30,7 @@
     "halo_catalog",
     "http_stream",
     "moab",
+    "nc4_cm1",
     "open_pmd",
     "owls",
     "owls_subfind",

diff --git a/yt/frontends/nc4_cm1/__init__.py b/yt/frontends/nc4_cm1/__init__.py
@@ -0,0 +1,6 @@
+"""
+API for yt.frontends.nc4_cm1
+
+
+
+"""
diff --git a/yt/frontends/nc4_cm1/api.py b/yt/frontends/nc4_cm1/api.py
@@ -0,0 +1,10 @@
+"""
+API for yt.frontends.nc4_cm1
+
+
+
+"""
+
+from .data_structures import CM1Dataset, CM1Grid, CM1Hierarchy
+from .fields import CM1FieldInfo
+from .io import CM1IOHandler
diff --git a/yt/frontends/nc4_cm1/data_structures.py b/yt/frontends/nc4_cm1/data_structures.py
@@ -0,0 +1,215 @@
+import os
+import stat
+import weakref
+from collections import OrderedDict
+
+import numpy as np
+
+from yt.data_objects.index_subobjects.grid_patch import AMRGridPatch
+from yt.data_objects.static_output import Dataset
+from yt.geometry.grid_geometry_handler import GridIndex
+from yt.utilities.file_handler import NetCDF4FileHandler, warn_netcdf
+from yt.utilities.logger import ytLogger as mylog
+
+from .fields import CM1FieldInfo
+
+
+class CM1Grid(AMRGridPatch):
+    _id_offset = 0
+
+    def __init__(self, id, index, level, dimensions):
+        super(CM1Grid, self).__init__(id, filename=index.index_filename, index=index)
+        self.Parent = None
+        self.Children = []
+        self.Level = level
+        self.ActiveDimensions = dimensions
+
+    def __repr__(self):
+        return f"CM1Grid_{self.id:d} ({self.ActiveDimensions})"
+
+
+class CM1Hierarchy(GridIndex):
+    grid = CM1Grid
+
+    def __init__(self, ds, dataset_type="cm1"):
+        self.dataset_type = dataset_type
+        self.dataset = weakref.proxy(ds)
+        # for now, the index file is the dataset!
+        self.index_filename = self.dataset.parameter_filename
+        self.directory = os.path.dirname(self.index_filename)
+        # float type for the simulation edges and must be float64 now
+        self.float_type = np.float64
+        super(CM1Hierarchy, self).__init__(ds, dataset_type)
+
+    def _detect_output_fields(self):
+        # build list of on-disk fields for dataset_type 'cm1'
+        vnames = self.dataset.parameters["variable_names"]
+        self.field_list = [("cm1", vname) for vname in vnames]
+
+    def _count_grids(self):
+        # This needs to set self.num_grids
+        self.num_grids = 1
+
+    def _parse_index(self):
+        self.grid_left_edge[0][:] = self.ds.domain_left_edge[:]
+        self.grid_right_edge[0][:] = self.ds.domain_right_edge[:]
+        self.grid_dimensions[0][:] = self.ds.domain_dimensions[:]
+        self.grid_particle_count[0][0] = 0
+        self.grid_levels[0][0] = 1
+        self.max_level = 1
+
+    def _populate_grid_objects(self):
+        self.grids = np.empty(self.num_grids, dtype="object")
+        for i in range(self.num_grids):
+            g = self.grid(i, self, self.grid_levels.flat[i], self.grid_dimensions[i])
+            g._prepare_grid()
+            g._setup_dx()
+            self.grids[i] = g
+
+
+class CM1Dataset(Dataset):
+    _index_class = CM1Hierarchy
+    _field_info_class = CM1FieldInfo
+
+    def __init__(
+        self,
+        filename,
+        dataset_type="cm1",
+        storage_filename=None,
+        units_override=None,
+        unit_system="mks",
+    ):
+        self.fluid_types += ("cm1",)
+        self._handle = NetCDF4FileHandler(filename)
+        # refinement factor between a grid and its subgrid
+        self.refine_by = 1
+        super(CM1Dataset, self).__init__(
+            filename,
+            dataset_type,
+            units_override=units_override,
+            unit_system=unit_system,
+        )
+        self.storage_filename = storage_filename
+        self.filename = filename
+
+    def _setup_coordinate_handler(self):
+        # ensure correct ordering of axes so plots aren't rotated (z should always be
+        # on the vertical axis).
+        super(CM1Dataset, self)._setup_coordinate_handler()
+        self.coordinates._x_pairs = (("x", "y"), ("y", "x"), ("z", "x"))
+        self.coordinates._y_pairs = (("x", "z"), ("y", "z"), ("z", "y"))
+
+    def _set_code_unit_attributes(self):
+        # This is where quantities are created that represent the various
+        # on-disk units.  These are the currently available quantities which
+        # should be set, along with examples of how to set them to standard
+        # values.
+        with self._handle.open_ds() as _handle:
+            length_unit = _handle.variables["xh"].units
+        self.length_unit = self.quan(1.0, length_unit)
+        self.mass_unit = self.quan(1.0, "kg")
+        self.time_unit = self.quan(1.0, "s")
+        self.velocity_unit = self.quan(1.0, "m/s")
+        self.time_unit = self.quan(1.0, "s")
+
+    def _parse_parameter_file(self):
+        # This needs to set up the following items.  Note that these are all
+        # assumed to be in code units; domain_left_edge and domain_right_edge
+        # will be converted to YTArray automatically at a later time.
+        # This includes the cosmological parameters.
+        #
+        #   self.unique_identifier      <= unique identifier for the dataset
+        #                                  being read (e.g., UUID or ST_CTIME)
+        self.unique_identifier = int(os.stat(self.parameter_filename)[stat.ST_CTIME])
+        self.parameters = {}  # code-specific items
+        with self._handle.open_ds() as _handle:
+            # _handle here is a netcdf Dataset object, we need to parse some metadata
+            # for constructing our yt ds.
+
+            # TO DO: generalize this to be coordiante variable name agnostic in order to
+            # make useful for WRF or climate data. For now, we're hard coding for CM1
+            # specifically and have named the classes appropriately. Additionaly, we
+            # are only handling the cell-centered grid ("xh","yh","zh") at present.
+            # The cell-centered grid contains scalar fields and interpolated velocities.
+            dims = [_handle.dimensions[i].size for i in ["xh", "yh", "zh"]]
+            xh, yh, zh = [_handle.variables[i][:] for i in ["xh", "yh", "zh"]]
+            self.domain_left_edge = np.array(
+                [xh.min(), yh.min(), zh.min()], dtype="float64"
+            )
+            self.domain_right_edge = np.array(
+                [xh.max(), yh.max(), zh.max()], dtype="float64"
+            )
+
+            # loop over the variable names in the netCDF file, record only those on the
+            # "zh","yh","xh" grid.
+            varnames = []
+            for key, var in _handle.variables.items():
+                if all(x in var.dimensions for x in ["time", "zh", "yh", "xh"]):
+                    varnames.append(key)
+            self.parameters["variable_names"] = varnames
+            self.parameters["lofs_version"] = _handle.cm1_lofs_version
+            self.parameters["is_uniform"] = _handle.uniform_mesh
+            self.current_time = _handle.variables["time"][:][0]
+
+            # record the dimension metadata: __handle.dimensions contains netcdf
+            # objects so we need to manually copy over attributes.
+            dim_info = OrderedDict()
+            for dim, meta in _handle.dimensions.items():
+                dim_info[dim] = meta.size
+            self.parameters["dimensions"] = dim_info
+
+        self.dimensionality = 3
+        self.domain_dimensions = np.array(dims, dtype="int64")
+        self.periodicity = (False, False, False)
+
+        # Set cosmological information to zero for non-cosmological.
+        self.cosmological_simulation = 0
+        self.current_redshift = 0.0
+        self.omega_lambda = 0.0
+        self.omega_matter = 0.0
+        self.hubble_constant = 0.0
+
+    @classmethod
+    def _is_valid(cls, filename, *args, **kwargs):
+        # This accepts a filename or a set of arguments and returns True or
+        # False depending on if the file is of the type requested.
+
+        warn_netcdf(filename)
+        try:
+            nc4_file = NetCDF4FileHandler(filename)
+            with nc4_file.open_ds(keepweakref=True) as _handle:
+                is_cm1_lofs = hasattr(_handle, "cm1_lofs_version")
+                is_cm1 = hasattr(_handle, "cm1 version")  # not a typo, it is a space...
+
+                # ensure coordinates of each variable array exists in the dataset
+                coords = _handle.dimensions  # get the dataset wide coordinates
+                failed_vars = []  # list of failed variables
+                for var in _handle.variables:  # iterate over the variables
+                    vcoords = _handle[var].dimensions  # get the dims for the variable
+                    ncoords = len(vcoords)  # number of coordinates in variable
+                    # number of coordinates that pass for a variable
+                    coordspassed = sum(vc in coords for vc in vcoords)
+                    if coordspassed != ncoords:
+                        failed_vars.append(var)
+
+                if failed_vars:
+                    mylog.warning(
+                        "Trying to load a cm1_lofs netcdf file but the "
+                        "coordinates of the following fields do not match the "
+                        f"coordinates of the dataset: {failed_vars}"
+                    )
+                    return False
+
+            if not is_cm1_lofs:
+                if is_cm1:
+                    mylog.warning(
+                        "It looks like you are trying to load a cm1 netcdf file, "
+                        "but at present yt only supports cm1_lofs output. Until"
+                        " support is added, you can likely use"
+                        " yt.load_uniform_grid() to load your cm1 file manually."
+                    )
+                return False
+        except (OSError, AttributeError, ImportError):
+            return False
+
+        return True
diff --git a/yt/frontends/nc4_cm1/fields.py b/yt/frontends/nc4_cm1/fields.py
@@ -0,0 +1,63 @@
+from yt.fields.field_info_container import FieldInfoContainer
+
+# We need to specify which fields we might have in our dataset.  The field info
+# container subclass here will define which fields it knows about.  There are
+# optionally methods on it that get called which can be subclassed.
+
+
+class CM1FieldInfo(FieldInfoContainer):
+    known_other_fields = (
+        # Each entry here is of the form
+        # ( "name", ("units", ["fields", "to", "alias"], # "display_name")),
+        ("uinterp", ("m/s", ["velocity_x"], None)),
+        ("vinterp", ("m/s", ["velocity_y"], None)),
+        ("winterp", ("m/s", ["velocity_z"], None)),
+        ("u", ("m/s", ["velocity_x"], None)),
+        ("v", ("m/s", ["velocity_y"], None)),
+        ("w", ("m/s", ["velocity_z"], None)),
+        ("hwin_sr", ("m/s", ["storm_relative_horizontal_wind_speed"], None)),
+        ("windmag_sr", ("m/s", ["storm_relative_3D_wind_speed"], None)),
+        ("hwin_gr", ("m/s", ["ground_relative_horizontal_wind_speed"], None)),
+        ("thpert", ("K", ["potential_temperature_perturbation"], None)),
+        ("thrhopert", ("K", ["density_potential_temperature_perturbation"], None)),
+        ("prespert", ("hPa", ["presure_perturbation"], None)),
+        ("rhopert", ("kg/m**3", ["density_perturbation"], None)),
+        ("dbz", ("dB", ["simulated_reflectivity"], None)),
+        ("qvpert", ("g/kg", ["water_vapor_mixing_ratio_perturbation"], None)),
+        ("qc", ("g/kg", ["cloud_liquid_water_mixing_ratio"], None)),
+        ("qr", ("g/kg", ["rain_mixing_ratio"], None)),
+        ("qi", ("g/kg", ["cloud_ice_mixing_ratio"], None)),
+        ("qs", ("g/kg", ["snow_mixing_ratio"], None)),
+        ("qg", ("g/kg", ["graupel_or_hail_mixing_ratio"], None)),
+        ("qcloud", ("g/kg", ["sum_of_cloud_water_and_cloud_ice_mixing_ratios"], None)),
+        ("qprecip", ("g/kg", ["sum_of_rain_graupel_snow_mixing_ratios"], None)),
+        ("nci", ("1/cm**3", ["number_concerntration_of_cloud_ice"], None)),
+        ("ncr", ("1/cm**3", ["number_concentration_of_rain"], None)),
+        ("ncs", ("1/cm**3", ["number_concentration_of_snow"], None)),
+        ("ncg", ("1/cm**3", ["number_concentration_of_graupel_or_hail"], None)),
+        ("xvort", ("1/s", ["vorticity_x"], None)),
+        ("yvort", ("1/s", ["vorticity_y"], None)),
+        ("zvort", ("1/s", ["vorticity_z"], None)),
+        ("hvort", ("1/s", ["horizontal_vorticity_magnitude"], None)),
+        ("vortmag", ("1/s", ["vorticity_magnitude"], None)),
+        ("streamvort", ("1/s", ["streamwise_vorticity"], None)),
+        ("khh", ("m**2/s", ["khh"], None)),
+        ("khv", ("m**2/s", ["khv"], None)),
+        ("kmh", ("m**2/s", ["kmh"], None)),
+        ("kmv", ("m**2/s", ["kmv"], None)),
+    )
+
+    known_particle_fields = (
+        # Identical form to above
+        # ( "name", ("units", ["fields", "to", "alias"], # "display_name")),
+    )
+
+    def setup_fluid_fields(self):
+        # Here we do anything that might need info about the dataset.
+        # You can use self.alias, self.add_output_field (for on-disk fields)
+        # and self.add_field (for derived fields).
+        pass
+
+    def setup_particle_fields(self, ptype):
+        super(CM1FieldInfo, self).setup_particle_fields(ptype)
+        # This will get called for every particle type.
diff --git a/yt/frontends/nc4_cm1/io.py b/yt/frontends/nc4_cm1/io.py
@@ -0,0 +1,62 @@
+import numpy as np
+
+from yt.utilities.file_handler import NetCDF4FileHandler
+from yt.utilities.io_handler import BaseIOHandler
+
+
+class CM1IOHandler(BaseIOHandler):
+    _particle_reader = False
+    _dataset_type = "cm1"
+
+    def __init__(self, ds):
+        self.filename = ds.filename
+        self._handle = NetCDF4FileHandler(self.filename)
+        super(CM1IOHandler, self).__init__(ds)
+
+    def _read_particle_coords(self, chunks, ptf):
+        # This needs to *yield* a series of tuples of (ptype, (x, y, z)).
+        # chunks is a list of chunks, and ptf is a dict where the keys are
+        # ptypes and the values are lists of fields.
+        raise NotImplementedError
+
+    def _read_particle_fields(self, chunks, ptf, selector):
+        # This gets called after the arrays have been allocated.  It needs to
+        # yield ((ptype, field), data) where data is the masked results of
+        # reading ptype, field and applying the selector to the data read in.
+        # Selector objects have a .select_points(x,y,z) that returns a mask, so
+        # you need to do your masking here.
+        raise NotImplementedError
+
+    def _read_fluid_selection(self, chunks, selector, fields, size):
+        # This needs to allocate a set of arrays inside a dictionary, where the
+        # keys are the (ftype, fname) tuples and the values are arrays that
+        # have been masked using whatever selector method is appropriate.  The
+        # dict gets returned at the end and it should be flat, with selected
+        # data.  Note that if you're reading grid data, you might need to
+        # special-case a grid selector object.
+        # Also note that "chunks" is a generator for multiple chunks, each of
+        # which contains a list of grids. The returned numpy arrays should be
+        # in 64-bit float and contiguous along the z direction. Therefore, for
+        # a C-like input array with the dimension [x][y][z] or a
+        # Fortran-like input array with the dimension (z,y,x), a matrix
+        # transpose is required (e.g., using np_array.transpose() or
+        # np_array.swapaxes(0,2)).
+
+        data = {}
+        offset = 0
+        with self._handle.open_ds() as ds:
+            for field in fields:
+                data[field] = np.empty(size, dtype="float64")
+                for chunk in chunks:
+                    for grid in chunk.objs:
+                        variable = ds.variables[field[1]][:][0]
+                        values = np.squeeze(variable.T)
+                        offset += grid.select(selector, values, data[field], offset)
+        return data
+
+    def _read_chunk_data(self, chunk, fields):
+        # This reads the data from a single chunk without doing any selection,
+        # and is only used for caching data that might be used by multiple
+        # different selectors later. For instance, this can speed up ghost zone
+        # computation.
+        pass
diff --git a/yt/frontends/nc4_cm1/tests/__init__.py b/yt/frontends/nc4_cm1/tests/__init__.py