-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat: move netcdf_io utility from DAS (#42)
* add netcdf write functions * add netcdf librarys to linter and pytest GitHub actions
- Loading branch information
1 parent
7242857
commit 1b8a18c
Showing
5 changed files
with
252 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
"""Utilities for reading NetCDF files""" | ||
# ---------------------------------------------------------------------------------- | ||
# Created on Mon Feb 13 2023 | ||
# | ||
# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. (1) | ||
# Copyright (c) 2023 Colorado State University. All rights reserved. (2) | ||
# | ||
# Contributors: | ||
# Geary J Layne (1) | ||
# Mackenzie Grimes (2) | ||
# | ||
# ---------------------------------------------------------------------------------- | ||
|
||
import logging | ||
import os | ||
from typing import List, Protocol, Tuple | ||
|
||
from netCDF4 import Dataset # pylint: disable=no-name-in-module | ||
import h5netcdf as h5nc | ||
import numpy as np | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
# cSpell:ignore ncattrs, getncattr, maskandscale | ||
class HasNcAttr(Protocol): | ||
"""Protocol that allows retrieving attributes""" | ||
def ncattrs(self) -> List[str]: | ||
"""Gives access to list of keys | ||
Returns: | ||
List[str]: Keys names for the attributes | ||
""" | ||
|
||
def getncattr(self, key: str) -> any: | ||
"""Gives access to value for specific key | ||
Args: | ||
key (str): Name of attribute to be retrieved | ||
Returns: | ||
any: The requested attribute, of unknown type | ||
""" | ||
|
||
|
||
def read_netcdf_global_attrs(filepath: str) -> dict: | ||
"""Read the global attributes from a Netcdf file | ||
Args: | ||
filepath (str): Path to Netcdf file | ||
Returns: | ||
dict: Global attributes as dictionary | ||
""" | ||
return _read_attrs(Dataset(filepath)) | ||
|
||
|
||
def read_netcdf(filepath: str, use_h5_lib = False) -> Tuple[dict, np.ndarray]: | ||
"""Reads DAS Netcdf file. | ||
Args: | ||
filepath (str): Path to DAS Netcdf file | ||
use_h5_lib: (bool): if True, python library h5netcdf will be used to do file I/O. | ||
If False, netCDF4 library will be used. Default is False (netcdf4 will be used). | ||
Returns: | ||
Tuple[dict, np.ndarray]: Global attributes and data | ||
""" | ||
if use_h5_lib: | ||
with h5nc.File(filepath, 'r') as nc_file: | ||
grid = nc_file.variables['grid'][:] | ||
return nc_file.attrs, grid | ||
|
||
# otherwise, use netcdf4 library (default) | ||
with Dataset(filepath) as dataset: | ||
dataset.set_auto_maskandscale(False) | ||
grid = dataset.variables['grid'][:] | ||
|
||
global_attrs = _read_attrs(dataset) | ||
return global_attrs, grid | ||
|
||
|
||
def write_netcdf(attrs: dict, grid: np.ndarray, filepath: str, use_h5_lib = False) -> str: | ||
"""Store data and attributes to a Netcdf4 file | ||
Args: | ||
attrs (dict): Attribute relative to the data to be written | ||
grid (np.array): Numpy array of data | ||
filepath (str): String representation of where to write the file | ||
use_h5_lib: (bool): if True, python library h5netcdf will be used to do file I/O. | ||
If False, netCDF4 library will be used. Default is False (netCDF4 will be used). | ||
Returns: | ||
str: The location that data was written to | ||
""" | ||
_make_dirs(filepath) | ||
logger.debug('Writing data to: %s', filepath) | ||
|
||
if use_h5_lib: | ||
with h5nc.File(filepath, 'w') as file: | ||
y_dimensions, x_dimensions = grid.shape | ||
# set dimensions with a dictionary | ||
file.dimensions = {'x': x_dimensions, 'y': y_dimensions} | ||
|
||
grid_var = file.create_variable('grid', ('y', 'x'), 'f4') | ||
grid_var[:] = grid | ||
|
||
for key, value in attrs.items(): | ||
file.attrs[key] = value | ||
|
||
else: | ||
# otherwise, write file using netCDF4 library (default) | ||
with Dataset(filepath, 'w', format='NETCDF4') as dataset: | ||
y_dimensions, x_dimensions = grid.shape | ||
dataset.createDimension('x', x_dimensions) | ||
dataset.createDimension('y', y_dimensions) | ||
|
||
grid_var = dataset.createVariable('grid', 'f4', ('y', 'x')) | ||
grid_var[:] = grid | ||
|
||
for key, value in attrs.items(): | ||
setattr(dataset, key, str(value)) | ||
|
||
return filepath | ||
|
||
|
||
def _make_dirs(filename: str): | ||
dirname = os.path.dirname(os.path.abspath(filename)) | ||
os.makedirs(dirname, exist_ok=True) | ||
|
||
|
||
def _read_attrs(has_nc_attr: HasNcAttr) -> dict: | ||
return {key: has_nc_attr.getncattr(key) for key in has_nc_attr.ncattrs()} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
"""Test suite for netcdf_io.py""" | ||
# -------------------------------------------------------------------------------- | ||
# Created on Mon May 1 2023 | ||
# | ||
# Copyright (c) 2023 Regents of the University of Colorado. All rights reserved. | ||
# | ||
# Contributors: | ||
# Geary J Layne | ||
# | ||
# -------------------------------------------------------------------------------- | ||
# pylint: disable=missing-function-docstring,redefined-outer-name,protected-access,unused-argument | ||
|
||
import os | ||
from typing import Dict, Tuple | ||
|
||
from pytest import fixture, approx | ||
from numpy import ndarray | ||
|
||
from idsse.common.netcdf_io import read_netcdf, read_netcdf_global_attrs, write_netcdf | ||
|
||
|
||
# test data | ||
EXAMPLE_NETCDF_FILEPATH = os.path.join( | ||
os.path.dirname(__file__), | ||
'resources', | ||
'gridstore55657865.nc' | ||
) | ||
|
||
EXAMPLE_ATTRIBUTES = { | ||
'product': 'NBM.AWS.GRIB', | ||
'field': 'TEMP', | ||
'valid_dt': '2022-11-11 17:00:00+00:00', | ||
'issue_dt': '2022-11-11 14:00:00+00:00', | ||
'task': 'data_task', | ||
'region': 'CO', | ||
'units': 'Fahrenheit', | ||
'proj_name': 'NBM', | ||
'proj_spec': '+proj=lcc +lat_0=25.0 +lon_0=-95.0 +lat_1=25.0 +r=6371200', | ||
'grid_spec': '+dx=2539.703 +dy=2539.703 +w=2345 +h=1597 +lat_ll=19.229 +lon_ll=-126.2766', | ||
'data_key': 'NBM.AWS.GRIB:CO:TEMP::Fahrenheit::20221111140000.20221111170000' | ||
} | ||
|
||
EXAMPLE_PROD_KEY = ( | ||
'product:NBM.AWS.GRIB-field:TEMP-issue:20221111140000-valid:20221112000000-units:Fahrenheit' | ||
) | ||
|
||
|
||
# pytest fixtures | ||
@fixture | ||
def example_netcdf_data() -> Tuple[Dict[str, any], ndarray]: | ||
return read_netcdf(EXAMPLE_NETCDF_FILEPATH) | ||
|
||
|
||
# tests | ||
def test_read_netcdf_global_attrs(): | ||
attrs = read_netcdf_global_attrs(EXAMPLE_NETCDF_FILEPATH) | ||
|
||
assert len(attrs) == 11 | ||
assert attrs == EXAMPLE_ATTRIBUTES | ||
|
||
|
||
def test_read_netcdf(example_netcdf_data: Tuple[Dict[str, any], ndarray]): | ||
attrs, grid = example_netcdf_data | ||
|
||
assert grid.shape == (1597, 2345) | ||
x_dimensions, y_dimensions = grid.shape | ||
|
||
assert grid[0][0] == approx(72.98599) | ||
assert grid[round(x_dimensions / 2)][round(y_dimensions / 2)] == approx(12.505991) | ||
assert grid[x_dimensions - 1][y_dimensions - 1] == approx(2.4259913) | ||
|
||
assert attrs == EXAMPLE_ATTRIBUTES | ||
|
||
|
||
def test_read_and_write_netcdf(example_netcdf_data: Tuple[Dict[str, any], ndarray]): | ||
# cleanup existing test file if needed | ||
temp_netcdf_filepath = './tmp/test_netcdf_file.nc' | ||
if os.path.exists(temp_netcdf_filepath): | ||
os.remove(temp_netcdf_filepath) | ||
|
||
attrs, grid = example_netcdf_data | ||
|
||
# verify write_netcdf functionality | ||
attrs['prodKey'] = EXAMPLE_PROD_KEY | ||
attrs['prodSource'] = attrs['product'] | ||
written_filepath = write_netcdf(attrs, grid, temp_netcdf_filepath) | ||
assert written_filepath == temp_netcdf_filepath | ||
assert os.path.exists(temp_netcdf_filepath) | ||
|
||
new_file_attrs, new_file_grid = read_netcdf(written_filepath) | ||
assert new_file_attrs == attrs | ||
assert new_file_grid[123][321] == grid[123][321] | ||
|
||
# cleanup created netcdf file | ||
os.remove(temp_netcdf_filepath) | ||
|
||
|
||
def test_read_and_write_netcdf_with_h5nc(example_netcdf_data: Tuple[Dict[str, any], ndarray]): | ||
# create h5nc file | ||
temp_netcdf_h5_filepath = './tmp/test_netcdf_h5_file.nc' | ||
if os.path.exists(temp_netcdf_h5_filepath): | ||
os.remove(temp_netcdf_h5_filepath) | ||
|
||
attrs, grid = example_netcdf_data | ||
|
||
# verify write_netcdf_with_h5nc functionality | ||
attrs['prodKey'] = EXAMPLE_PROD_KEY | ||
attrs['prodSource'] = attrs['product'] | ||
written_filepath = write_netcdf(attrs, grid, temp_netcdf_h5_filepath, use_h5_lib=True) | ||
assert written_filepath == temp_netcdf_h5_filepath | ||
|
||
# Don't verify h5 attrs for now; they are some custom h5py type and aren't easy to access | ||
_, new_file_grid = read_netcdf(written_filepath, use_h5_lib=True) | ||
assert new_file_grid[123][321] == grid[123][321] | ||
|
||
# cleanup created netcdf h5 file | ||
os.remove(temp_netcdf_h5_filepath) |