Skip to content

Commit

Permalink
Merge pull request #2176 from keltonhalbert/nc4-griddata
Browse files Browse the repository at this point in the history
netCDF Front End for Cloud Model Simulations
  • Loading branch information
munkm authored Oct 7, 2020
2 parents 324d3bb + 12166da commit 65af8de
Show file tree
Hide file tree
Showing 11 changed files with 441 additions and 2 deletions.
3 changes: 3 additions & 0 deletions tests/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ answer_tests:

#local_particle_trajectory_001:
# - yt/data_objects/tests/test_particle_trajectories.py

local_nc4_cm1_000: # PR 2176
- yt/frontends/nc4_cm1/tests/test_outputs.py:test_cm1_mesh_fields

other_tests:
unittests:
Expand Down
1 change: 1 addition & 0 deletions yt/frontends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"halo_catalog",
"http_stream",
"moab",
"nc4_cm1",
"open_pmd",
"owls",
"owls_subfind",
Expand Down
6 changes: 6 additions & 0 deletions yt/frontends/nc4_cm1/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""
API for yt.frontends.nc4_cm1
"""
10 changes: 10 additions & 0 deletions yt/frontends/nc4_cm1/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""
API for yt.frontends.nc4_cm1
"""

from .data_structures import CM1Dataset, CM1Grid, CM1Hierarchy
from .fields import CM1FieldInfo
from .io import CM1IOHandler
215 changes: 215 additions & 0 deletions yt/frontends/nc4_cm1/data_structures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
import os
import stat
import weakref
from collections import OrderedDict

import numpy as np

from yt.data_objects.index_subobjects.grid_patch import AMRGridPatch
from yt.data_objects.static_output import Dataset
from yt.geometry.grid_geometry_handler import GridIndex
from yt.utilities.file_handler import NetCDF4FileHandler, warn_netcdf
from yt.utilities.logger import ytLogger as mylog

from .fields import CM1FieldInfo


class CM1Grid(AMRGridPatch):
_id_offset = 0

def __init__(self, id, index, level, dimensions):
super(CM1Grid, self).__init__(id, filename=index.index_filename, index=index)
self.Parent = None
self.Children = []
self.Level = level
self.ActiveDimensions = dimensions

def __repr__(self):
return f"CM1Grid_{self.id:d} ({self.ActiveDimensions})"


class CM1Hierarchy(GridIndex):
grid = CM1Grid

def __init__(self, ds, dataset_type="cm1"):
self.dataset_type = dataset_type
self.dataset = weakref.proxy(ds)
# for now, the index file is the dataset!
self.index_filename = self.dataset.parameter_filename
self.directory = os.path.dirname(self.index_filename)
# float type for the simulation edges and must be float64 now
self.float_type = np.float64
super(CM1Hierarchy, self).__init__(ds, dataset_type)

def _detect_output_fields(self):
# build list of on-disk fields for dataset_type 'cm1'
vnames = self.dataset.parameters["variable_names"]
self.field_list = [("cm1", vname) for vname in vnames]

def _count_grids(self):
# This needs to set self.num_grids
self.num_grids = 1

def _parse_index(self):
self.grid_left_edge[0][:] = self.ds.domain_left_edge[:]
self.grid_right_edge[0][:] = self.ds.domain_right_edge[:]
self.grid_dimensions[0][:] = self.ds.domain_dimensions[:]
self.grid_particle_count[0][0] = 0
self.grid_levels[0][0] = 1
self.max_level = 1

def _populate_grid_objects(self):
self.grids = np.empty(self.num_grids, dtype="object")
for i in range(self.num_grids):
g = self.grid(i, self, self.grid_levels.flat[i], self.grid_dimensions[i])
g._prepare_grid()
g._setup_dx()
self.grids[i] = g


class CM1Dataset(Dataset):
_index_class = CM1Hierarchy
_field_info_class = CM1FieldInfo

def __init__(
self,
filename,
dataset_type="cm1",
storage_filename=None,
units_override=None,
unit_system="mks",
):
self.fluid_types += ("cm1",)
self._handle = NetCDF4FileHandler(filename)
# refinement factor between a grid and its subgrid
self.refine_by = 1
super(CM1Dataset, self).__init__(
filename,
dataset_type,
units_override=units_override,
unit_system=unit_system,
)
self.storage_filename = storage_filename
self.filename = filename

def _setup_coordinate_handler(self):
# ensure correct ordering of axes so plots aren't rotated (z should always be
# on the vertical axis).
super(CM1Dataset, self)._setup_coordinate_handler()
self.coordinates._x_pairs = (("x", "y"), ("y", "x"), ("z", "x"))
self.coordinates._y_pairs = (("x", "z"), ("y", "z"), ("z", "y"))

def _set_code_unit_attributes(self):
# This is where quantities are created that represent the various
# on-disk units. These are the currently available quantities which
# should be set, along with examples of how to set them to standard
# values.
with self._handle.open_ds() as _handle:
length_unit = _handle.variables["xh"].units
self.length_unit = self.quan(1.0, length_unit)
self.mass_unit = self.quan(1.0, "kg")
self.time_unit = self.quan(1.0, "s")
self.velocity_unit = self.quan(1.0, "m/s")
self.time_unit = self.quan(1.0, "s")

def _parse_parameter_file(self):
# This needs to set up the following items. Note that these are all
# assumed to be in code units; domain_left_edge and domain_right_edge
# will be converted to YTArray automatically at a later time.
# This includes the cosmological parameters.
#
# self.unique_identifier <= unique identifier for the dataset
# being read (e.g., UUID or ST_CTIME)
self.unique_identifier = int(os.stat(self.parameter_filename)[stat.ST_CTIME])
self.parameters = {} # code-specific items
with self._handle.open_ds() as _handle:
# _handle here is a netcdf Dataset object, we need to parse some metadata
# for constructing our yt ds.

# TO DO: generalize this to be coordiante variable name agnostic in order to
# make useful for WRF or climate data. For now, we're hard coding for CM1
# specifically and have named the classes appropriately. Additionaly, we
# are only handling the cell-centered grid ("xh","yh","zh") at present.
# The cell-centered grid contains scalar fields and interpolated velocities.
dims = [_handle.dimensions[i].size for i in ["xh", "yh", "zh"]]
xh, yh, zh = [_handle.variables[i][:] for i in ["xh", "yh", "zh"]]
self.domain_left_edge = np.array(
[xh.min(), yh.min(), zh.min()], dtype="float64"
)
self.domain_right_edge = np.array(
[xh.max(), yh.max(), zh.max()], dtype="float64"
)

# loop over the variable names in the netCDF file, record only those on the
# "zh","yh","xh" grid.
varnames = []
for key, var in _handle.variables.items():
if all(x in var.dimensions for x in ["time", "zh", "yh", "xh"]):
varnames.append(key)
self.parameters["variable_names"] = varnames
self.parameters["lofs_version"] = _handle.cm1_lofs_version
self.parameters["is_uniform"] = _handle.uniform_mesh
self.current_time = _handle.variables["time"][:][0]

# record the dimension metadata: __handle.dimensions contains netcdf
# objects so we need to manually copy over attributes.
dim_info = OrderedDict()
for dim, meta in _handle.dimensions.items():
dim_info[dim] = meta.size
self.parameters["dimensions"] = dim_info

self.dimensionality = 3
self.domain_dimensions = np.array(dims, dtype="int64")
self.periodicity = (False, False, False)

# Set cosmological information to zero for non-cosmological.
self.cosmological_simulation = 0
self.current_redshift = 0.0
self.omega_lambda = 0.0
self.omega_matter = 0.0
self.hubble_constant = 0.0

@classmethod
def _is_valid(cls, filename, *args, **kwargs):
# This accepts a filename or a set of arguments and returns True or
# False depending on if the file is of the type requested.

warn_netcdf(filename)
try:
nc4_file = NetCDF4FileHandler(filename)
with nc4_file.open_ds(keepweakref=True) as _handle:
is_cm1_lofs = hasattr(_handle, "cm1_lofs_version")
is_cm1 = hasattr(_handle, "cm1 version") # not a typo, it is a space...

# ensure coordinates of each variable array exists in the dataset
coords = _handle.dimensions # get the dataset wide coordinates
failed_vars = [] # list of failed variables
for var in _handle.variables: # iterate over the variables
vcoords = _handle[var].dimensions # get the dims for the variable
ncoords = len(vcoords) # number of coordinates in variable
# number of coordinates that pass for a variable
coordspassed = sum(vc in coords for vc in vcoords)
if coordspassed != ncoords:
failed_vars.append(var)

if failed_vars:
mylog.warning(
"Trying to load a cm1_lofs netcdf file but the "
"coordinates of the following fields do not match the "
f"coordinates of the dataset: {failed_vars}"
)
return False

if not is_cm1_lofs:
if is_cm1:
mylog.warning(
"It looks like you are trying to load a cm1 netcdf file, "
"but at present yt only supports cm1_lofs output. Until"
" support is added, you can likely use"
" yt.load_uniform_grid() to load your cm1 file manually."
)
return False
except (OSError, AttributeError, ImportError):
return False

return True
63 changes: 63 additions & 0 deletions yt/frontends/nc4_cm1/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from yt.fields.field_info_container import FieldInfoContainer

# We need to specify which fields we might have in our dataset. The field info
# container subclass here will define which fields it knows about. There are
# optionally methods on it that get called which can be subclassed.


class CM1FieldInfo(FieldInfoContainer):
known_other_fields = (
# Each entry here is of the form
# ( "name", ("units", ["fields", "to", "alias"], # "display_name")),
("uinterp", ("m/s", ["velocity_x"], None)),
("vinterp", ("m/s", ["velocity_y"], None)),
("winterp", ("m/s", ["velocity_z"], None)),
("u", ("m/s", ["velocity_x"], None)),
("v", ("m/s", ["velocity_y"], None)),
("w", ("m/s", ["velocity_z"], None)),
("hwin_sr", ("m/s", ["storm_relative_horizontal_wind_speed"], None)),
("windmag_sr", ("m/s", ["storm_relative_3D_wind_speed"], None)),
("hwin_gr", ("m/s", ["ground_relative_horizontal_wind_speed"], None)),
("thpert", ("K", ["potential_temperature_perturbation"], None)),
("thrhopert", ("K", ["density_potential_temperature_perturbation"], None)),
("prespert", ("hPa", ["presure_perturbation"], None)),
("rhopert", ("kg/m**3", ["density_perturbation"], None)),
("dbz", ("dB", ["simulated_reflectivity"], None)),
("qvpert", ("g/kg", ["water_vapor_mixing_ratio_perturbation"], None)),
("qc", ("g/kg", ["cloud_liquid_water_mixing_ratio"], None)),
("qr", ("g/kg", ["rain_mixing_ratio"], None)),
("qi", ("g/kg", ["cloud_ice_mixing_ratio"], None)),
("qs", ("g/kg", ["snow_mixing_ratio"], None)),
("qg", ("g/kg", ["graupel_or_hail_mixing_ratio"], None)),
("qcloud", ("g/kg", ["sum_of_cloud_water_and_cloud_ice_mixing_ratios"], None)),
("qprecip", ("g/kg", ["sum_of_rain_graupel_snow_mixing_ratios"], None)),
("nci", ("1/cm**3", ["number_concerntration_of_cloud_ice"], None)),
("ncr", ("1/cm**3", ["number_concentration_of_rain"], None)),
("ncs", ("1/cm**3", ["number_concentration_of_snow"], None)),
("ncg", ("1/cm**3", ["number_concentration_of_graupel_or_hail"], None)),
("xvort", ("1/s", ["vorticity_x"], None)),
("yvort", ("1/s", ["vorticity_y"], None)),
("zvort", ("1/s", ["vorticity_z"], None)),
("hvort", ("1/s", ["horizontal_vorticity_magnitude"], None)),
("vortmag", ("1/s", ["vorticity_magnitude"], None)),
("streamvort", ("1/s", ["streamwise_vorticity"], None)),
("khh", ("m**2/s", ["khh"], None)),
("khv", ("m**2/s", ["khv"], None)),
("kmh", ("m**2/s", ["kmh"], None)),
("kmv", ("m**2/s", ["kmv"], None)),
)

known_particle_fields = (
# Identical form to above
# ( "name", ("units", ["fields", "to", "alias"], # "display_name")),
)

def setup_fluid_fields(self):
# Here we do anything that might need info about the dataset.
# You can use self.alias, self.add_output_field (for on-disk fields)
# and self.add_field (for derived fields).
pass

def setup_particle_fields(self, ptype):
super(CM1FieldInfo, self).setup_particle_fields(ptype)
# This will get called for every particle type.
62 changes: 62 additions & 0 deletions yt/frontends/nc4_cm1/io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import numpy as np

from yt.utilities.file_handler import NetCDF4FileHandler
from yt.utilities.io_handler import BaseIOHandler


class CM1IOHandler(BaseIOHandler):
_particle_reader = False
_dataset_type = "cm1"

def __init__(self, ds):
self.filename = ds.filename
self._handle = NetCDF4FileHandler(self.filename)
super(CM1IOHandler, self).__init__(ds)

def _read_particle_coords(self, chunks, ptf):
# This needs to *yield* a series of tuples of (ptype, (x, y, z)).
# chunks is a list of chunks, and ptf is a dict where the keys are
# ptypes and the values are lists of fields.
raise NotImplementedError

def _read_particle_fields(self, chunks, ptf, selector):
# This gets called after the arrays have been allocated. It needs to
# yield ((ptype, field), data) where data is the masked results of
# reading ptype, field and applying the selector to the data read in.
# Selector objects have a .select_points(x,y,z) that returns a mask, so
# you need to do your masking here.
raise NotImplementedError

def _read_fluid_selection(self, chunks, selector, fields, size):
# This needs to allocate a set of arrays inside a dictionary, where the
# keys are the (ftype, fname) tuples and the values are arrays that
# have been masked using whatever selector method is appropriate. The
# dict gets returned at the end and it should be flat, with selected
# data. Note that if you're reading grid data, you might need to
# special-case a grid selector object.
# Also note that "chunks" is a generator for multiple chunks, each of
# which contains a list of grids. The returned numpy arrays should be
# in 64-bit float and contiguous along the z direction. Therefore, for
# a C-like input array with the dimension [x][y][z] or a
# Fortran-like input array with the dimension (z,y,x), a matrix
# transpose is required (e.g., using np_array.transpose() or
# np_array.swapaxes(0,2)).

data = {}
offset = 0
with self._handle.open_ds() as ds:
for field in fields:
data[field] = np.empty(size, dtype="float64")
for chunk in chunks:
for grid in chunk.objs:
variable = ds.variables[field[1]][:][0]
values = np.squeeze(variable.T)
offset += grid.select(selector, values, data[field], offset)
return data

def _read_chunk_data(self, chunk, fields):
# This reads the data from a single chunk without doing any selection,
# and is only used for caching data that might be used by multiple
# different selectors later. For instance, this can speed up ghost zone
# computation.
pass
Empty file.
Loading

0 comments on commit 65af8de

Please sign in to comment.