Skip to content

Commit

Permalink
Merge pull request #112 from NCAR/develop
Browse files Browse the repository at this point in the history
New readers from MM satellite group
  • Loading branch information
zmoon authored Jun 13, 2024
2 parents 1489eaf + 899e530 commit 91e2ea4
Show file tree
Hide file tree
Showing 11 changed files with 765 additions and 7 deletions.
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
._.DS_Store

docs/api/

tests/data/MOP*.he5
tests/data/TROPOMI*.nc
tests/data/OMPS-*.h5

# Default GitHub .gitignore for Python below:

Expand Down
2 changes: 2 additions & 0 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,15 @@ dependencies:
#
# optional
- h5netcdf
- h5py
- joblib
- lxml
- pyhdf!=0.11.3
- requests
- timezonefinder
#
# test
- filelock
- pytest
- pytest-xdist
#
Expand Down
7 changes: 6 additions & 1 deletion monetio/models/_wrfchem_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,12 @@ def open_mfdataset(
if "pm25_om" in list_calc_sum:
dset = add_lazy_om_pm25(dset, dict_sum)

dset = dset.reset_coords(["XTIME", "datetime"], drop=True)
# fix to work with Jerrold's wrfchem files
if "XTIME" in list(dset.coords):
dset = dset.reset_coords(["XTIME", "datetime"], drop=True)
else:
dset = dset.reset_coords("datetime", drop=True)

if not surf_only_nc:
# Reset more variables
dset = dset.rename(
Expand Down
10 changes: 8 additions & 2 deletions monetio/sat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from . import (
_gridded_eos_mm,
_modis_l2_mm,
_mopitt_l3_mm,
_omps_l3_mm,
_omps_nadir_mm,
_tropomi_l2_no2_mm,
goes,
modis_ornl,
nesdis_edr_viirs,
Expand All @@ -12,12 +15,15 @@
__all__ = [
"_gridded_eos_mm",
"_modis_l2_mm",
"_mopitt_l3_mm",
"_omps_l3_mm",
"_omps_nadir_mm",
"_tropomi_l2_no2_mm",
"goes",
"modis_ornl",
"nesdis_edr_viirs",
"nesdis_eps_viirs",
"nesdis_frp",
"modis_ornl",
"goes",
]

__name__ = "sat"
4 changes: 1 addition & 3 deletions monetio/sat/_modis_l2_mm.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,7 @@ def read_mfdataset(fnames, variable_dict, debug=False):
"""
if debug:
logging_level = logging.DEBUG
else:
logging_level = logging.INFO
logging.basicConfig(stream=sys.stdout, level=logging_level)
logging.basicConfig(stream=sys.stdout, level=logging_level)

files = sorted(glob(fnames))

Expand Down
263 changes: 263 additions & 0 deletions monetio/sat/_mopitt_l3_mm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
"""MOPITT gridded data file reader.
History:
- updated 2024-02 meb
* read multiple variables into a DataSet instead of individual variables
* add functions to combine profiles and surface as in rrb's code
- updated 2023-08 rrb
* Added units
- updated 2022-10 rrb
* Dataset instead of DataArray
- created 2021-12 rrb
"""
import glob
from pathlib import Path

import pandas as pd
import xarray as xr


def get_start_time(filename):
"""Method to read the time in MOPITT level 3 HDF files.
Parameters
----------
filename : str
Path to the file.
Returns
-------
pandas.Timestamp or pandas.NaT
"""
import h5py

structure = "/HDFEOS/ADDITIONAL/FILE_ATTRIBUTES"

inFile = h5py.File(filename, "r")

grp = inFile[structure]
k = grp.attrs
startTimeBytes = k.get("StartTime", default=None) # one-element float array
if startTimeBytes is None:
startTime = pd.NaT
else:
startTime = pd.to_datetime(
startTimeBytes[0],
unit="s",
origin="1993-01-01 00:00:00",
)

inFile.close()

return startTime


def load_variable(filename, varname):
"""Method to open MOPITT gridded HDF files.
Masks data that is missing (turns into ``np.nan``).
Parameters
----------
filename
Path to the file.
varname : str
The variable to load from the MOPITT file.
Returns
-------
xarray.Dataset
"""
import h5py

ds = xr.Dataset()

# Load the dimensions
he5_load = h5py.File(filename, mode="r")
lat = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Latitude"][:]
lon = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Longitude"][:]
alt = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure2"][:]
alt_short = he5_load["/HDFEOS/GRIDS/MOP03/Data Fields/Pressure"][:]

# 2D or 3D variables to choose from
variable_dict = {
"column": "/HDFEOS/GRIDS/MOP03/Data Fields/RetrievedCOTotalColumnDay",
"apriori_col": "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOTotalColumnDay",
"apriori_surf": "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOSurfaceMixingRatioDay",
"pressure_surf": "/HDFEOS/GRIDS/MOP03/Data Fields/SurfacePressureDay",
"ak_col": "/HDFEOS/GRIDS/MOP03/Data Fields/TotalColumnAveragingKernelDay",
"apriori_prof": "/HDFEOS/GRIDS/MOP03/Data Fields/APrioriCOMixingRatioProfileDay",
}
if varname not in variable_dict:
raise ValueError(f"Variable {varname!r} not in {sorted(variable_dict)}.")
data_loaded = he5_load[variable_dict[varname]][:]

he5_load.close()

# Create xarray DataArray
if varname == "column":
ds[varname] = xr.DataArray(
data_loaded,
dims=["lon", "lat"],
coords=[lon, lat],
attrs={
"long_name": "Retrieved CO Total Column",
"units": "molec/cm^2",
},
)
elif varname in {"apriori_col", "apriori_surf", "pressure_surf"}:
ds[varname] = xr.DataArray(data_loaded, dims=["lon", "lat"], coords=[lon, lat])
elif varname == "ak_col":
ds[varname] = xr.DataArray(
data_loaded,
dims=["lon", "lat", "alt"],
coords=[lon, lat, alt],
attrs={
"long_name": "Total Column Averaging Kernel",
"units": "mol/(cm^2 log(VMR))",
},
)
elif varname == "apriori_prof":
ds[varname] = xr.DataArray(
data_loaded,
dims=["lon", "lat", "alt"],
coords=[lon, lat, alt_short],
attrs={
"long_name": "A Priori CO Mixing Ratio Profile",
"units": "ppbv",
},
)
else:
raise AssertionError(f"Variable {varname!r} in variable dict but not accounted for.")

# missing value -> nan
ds[varname] = ds[varname].where(ds[varname] != -9999.0)

return ds


def _add_pressure_variables(dataset):
"""Setup 3-D pressure array.
Parameters
----------
dataset : xarray.Dataset
Should have the 3D averaging kernel field and surface pressure field
Returns
-------
xarray.DataSet
"""
import numpy as np

# broadcast 10 levels 1000 to 100 hPa repeated everywhere
dummy, press_dummy_arr = xr.broadcast(dataset["ak_col"], dataset["ak_col"].alt)
# Replace level with 1000 hPa with the actual surface pressure
dataset["pressure"] = press_dummy_arr.copy()
dataset["pressure"][:, :, :, 9] = dataset["pressure_surf"].values

# Correct for where MOPITT surface pressure <900 hPa
# difference between layer pressure and surface pressure
diff = xr.full_like(dataset["pressure"], np.nan)
diff[:, :, :, 0] = 1000
diff[:, :, :, 1:] = (
dataset["pressure_surf"].values[:, :, :, None] - dataset["pressure"][:, :, :, :9].values
)
# add fill values below true surface
dataset["pressure"] = dataset["pressure"].where(diff > 0)
# replace lowest pressure with surface pressure; broadcast happens in background
dataset["pressure"].values = (
dataset["pressure_surf"].where((diff > 0) & (diff < 100), dataset["pressure"]).values
)

# Center Pressure
dummy = dataset["pressure"].copy()
dummy[:, :, :, 0] = 87.0
for z in range(1, 10):
dummy[:, :, :, z] = (
dataset["pressure"][:, :, :, z]
- (dataset["pressure"][:, :, :, z] - dataset["pressure"][:, :, :, z - 1]) / 2
)
dataset["pressure"] = dummy

return dataset


def _combine_apriori(dataset):
"""MOPITT surface values are stored separately to profile values because
of the floating surface pressure. So, for the smoothing calculations,
need to combine surface and profile
Parameters
----------
xarray.Dataset
Returns
-------
xarray.Dataset
"""
import numpy as np

dataset["apriori_prof"][:, :, :, -1] = dataset["apriori_surf"].values

# As with pressure, correct for where MOPITT surface pressure <900 hPa
# difference between layer pressure and surface pressure
diff = xr.full_like(dataset["pressure"], np.nan)
diff[:, :, :, 0] = 1000
diff[:, :, :, 1:] = (
dataset["pressure_surf"].values[:, :, :, None] - dataset["pressure"][:, :, :, :9].values
)
# add fill values below true surface
dataset["apriori_prof"] = dataset["apriori_prof"].where(diff > 0)
# replace lowest pressure with surface pressure; broadcast happens in background
dataset["apriori_prof"].values = (
dataset["apriori_surf"].where((diff > 0) & (diff < 100), dataset["apriori_prof"]).values
)

return dataset


def open_dataset(files, varnames):
"""Loop through files to open the MOPITT level 3 data for variable `varname`.
Parameters
----------
files : str or Path or list
Input file path(s).
If :class:`str`, shell-style wildcards (e.g. ``*``) will be expanded.
varnames : str or list of str
The variable(s) to load from the MOPITT file.
Returns
-------
xarray.Dataset
"""
if isinstance(files, str):
filelist = sorted(glob.glob(files, recursive=False))
elif isinstance(files, Path):
filelist = [files]
else:
filelist = files # assume list

if isinstance(varnames, str):
varnames = [varnames]

datasets = []
for filename in filelist:
print(filename)
file_varset = []
for varname in varnames:
data = load_variable(filename, varname)
time = get_start_time(filename)
data = data.expand_dims(axis=0, time=[time])
file_varset.append(data)

# merge variables for file into single dataset
data = xr.merge(file_varset)
if "apriori_prof" in varnames and "pressure_surf" in varnames:
# add 3-d pressure field
data = _add_pressure_variables(data)
# combine surface and rest of profile into single variable
data = _combine_apriori(data)

datasets.append(data)

return xr.concat(datasets, dim="time")
Loading

0 comments on commit 91e2ea4

Please sign in to comment.