Skip to content

Commit

Permalink
comments added to guide new developers
Browse files Browse the repository at this point in the history
  • Loading branch information
nlensse1 committed Jun 27, 2024
1 parent 7aeec90 commit cac6c2b
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 12 deletions.
26 changes: 17 additions & 9 deletions podaac/subsetter/gpm_cleanup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""
Module designed for mapping the dimensions in GPM. Phony dimensions are changed
to nscan, nbin, nfreq by using the DimensionNames variable attribute
Module designed for mapping the dimensions in GPM, and changing calendar time type
to seconds since 1980-1-6 T00:00:00Z
Problem: each variable will have unique phony dims. When time for subset_with_bbox
is called, each unique group will have to have common dimensions.
Solution: Phony dimensions are changed to nscan, nbin, nfreq and will have the same name for each
unique group that will be subsetted.
nscan, nbin, and nfreq are named in the variable attributes.
"""

import datetime
Expand All @@ -14,9 +19,10 @@ def compute_new_time_data(time_group, nc_dataset):
create a time variable, timeMidScan, that is present in other
GPM collections but not the ENV collections.
"""
# set the time unit for GPM
# set the time unit for GPM - this is the case for all GPM collections
time_unit_out = "seconds since 1980-01-06 00:00:00"
# conver to a float, seconds variable
# convert to a float variable, seconds since the above date
# this list will be a new time variable
new_time_list = [date2num(datetime.datetime(
nc_dataset[time_group+'__Year'][:][i],
nc_dataset[time_group+'__Month'][:][i],
Expand All @@ -37,7 +43,7 @@ def change_var_dims(nc_dataset, variables=None, time_name="__timeMidScan"):
dimensions to have the name in the DimensionName attribute rather than phony_dim
"""
var_list = list(nc_dataset.variables.keys())
# loop through variable list to avoid netcdf4 runtime error
# loop through the entire variable list to avoid netcdf4 runtime error
for var_name in var_list:
# GPM will always need to be cleaned up via netCDF
# generalizing coordinate variables in netCDF file to speed variable subsetting up
Expand All @@ -57,14 +63,16 @@ def change_var_dims(nc_dataset, variables=None, time_name="__timeMidScan"):
dim_prefix = var_name.split('__')[1]
# new dimension name
new_dim = '__'+dim_prefix+'__'+dim
length = var.shape[count]
# get dim size of the newly created variable
dim_size = var.shape[count]
# check if the dimension name created has already been created in the dataset
if new_dim not in dim_dict:
# create the new dimension
nc_dataset.createDimension(new_dim, length)
dim_dict[new_dim] = length
nc_dataset.createDimension(new_dim, dim_size)
dim_dict[new_dim] = dim_size
# utilized from Dimension Cleanup module
attrs_contents = {}
# new variable that will point to the updated dimensions
new_mapped_var = {}
# if the variable has attributes, get the attributes to then be copied to the new variable
if len(var.ncattrs()) > 0:
Expand Down Expand Up @@ -99,7 +107,7 @@ def change_var_dims(nc_dataset, variables=None, time_name="__timeMidScan"):
comp_args = {"zlib": True, "complevel": 1}
nc_dataset.createVariable(new_time_var_name, 'f8', var_dims, **comp_args)
nc_dataset.variables[new_time_var_name].setncattr('unit', time_unit)
# copy the data in
# copy the new time list into the new time variable
nc_dataset.variables[new_time_var_name][:] = time_data

return nc_dataset
4 changes: 2 additions & 2 deletions podaac/subsetter/group_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def recombine_grouped_datasets(datasets: List[xr.Dataset], output_file: str, sta
Name of the output file to write the resulting NetCDF file to.
TODO: add docstring and type hint for `start_date` parameter.
"""

#
base_dataset = nc.Dataset(output_file, mode='w')
for dataset in datasets:
group_lst = []
Expand Down Expand Up @@ -179,7 +179,7 @@ def _rename_variables(dataset: xr.Dataset, base_dataset: nc.Dataset, start_date,
comp_args = {"zlib": True, "complevel": 1}

var_data = variable.data

# create variable based upon the original data type
if variable.dtype in [object, '|S27']:
comp_args = {"zlib": False, "complevel": 1}
var_group.createVariable(new_var_name, 'S4', var_dims, fill_value=fill_value, **comp_args)
Expand Down
4 changes: 3 additions & 1 deletion podaac/subsetter/time_converting.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@

def get_start_date(instrument_type):
"""
returns the start date based on the instrument type
returns the start date based on the instrument type. Start date will calculate
seconds since this start date.
"""
if instrument_type in ['OMI', 'MLS']:
start_date = datetime.datetime.strptime("1993-01-01T00:00:00.00", "%Y-%m-%dT%H:%M:%S.%f")
Expand Down Expand Up @@ -51,6 +52,7 @@ def convert_to_datetime(dataset: xr.Dataset, time_vars: list, instrument_type: s
dataset[var].values = date_time_array.astype("datetime64[ns]") + dataset[var].astype('timedelta64[s]').values
continue
# if there isn't a start_date, get it from the UTC variable
# subtracts the seconds from the first UTC time to get the date that seconds are from
utc_var_name = subset.compute_utc_name(dataset)
if utc_var_name:
start_seconds = dataset[var].values[0]
Expand Down

0 comments on commit cac6c2b

Please sign in to comment.