diff --git a/mpas_analysis/ocean/meridional_overturning_circulation.py b/mpas_analysis/ocean/meridional_overturning_circulation.py index 71c74a5ab..8cf626c2a 100644 --- a/mpas_analysis/ocean/meridional_overturning_circulation.py +++ b/mpas_analysis/ocean/meridional_overturning_circulation.py @@ -36,6 +36,8 @@ from ..shared.analysis_task import setup_task from ..shared.climatology import climatology +from ..shared.constants import constants + def moc_streamfunction(config): # {{{ """ @@ -304,7 +306,10 @@ def _compute_moc_climo_postprocess(config, runStreams, variableMap, calendar, dictClimo['endYearClimo'] = endYear # Compute annual climatology - annualClimatology = ds.mean('Time') + annualClimatology = \ + climatology.compute_climatology(ds, + constants.monthDictionary['ANN'], + calendar) # Convert to numpy arrays # (can result in a memory error for large array size) diff --git a/mpas_analysis/ocean/nino34_index.py b/mpas_analysis/ocean/nino34_index.py index 402e9703e..c0516be8d 100644 --- a/mpas_analysis/ocean/nino34_index.py +++ b/mpas_analysis/ocean/nino34_index.py @@ -7,8 +7,9 @@ Last Modified ------------- -03/23/2017 +03/29/2017 """ + import xarray as xr import pandas as pd import numpy as np @@ -51,7 +52,7 @@ def nino34_index(config, streamMap=None, variableMap=None): # {{{ Last Modified ------------- - 03/23/2017 + 03/29/2017 """ print ' Load SST data...' @@ -87,9 +88,9 @@ def nino34_index(config, streamMap=None, variableMap=None): # {{{ startDate=startDate, endDate=endDate) - SSTregions = ds.avgSurfaceTemperature print ' Compute NINO3.4 index...' - nino34 = compute_nino34_index(SSTregions[:, regionIndex], calendar) + regionSST = ds.avgSurfaceTemperature.isel(nOceanRegions=regionIndex) + nino34 = compute_nino34_index(regionSST, calendar) print ' Computing NINO3.4 power spectra...' f, spectra, conf99, conf95, redNoise = compute_nino34_spectra(nino34) @@ -122,7 +123,7 @@ def compute_nino34_index(regionSST, calendar): # {{{ Parameters ---------- - regionSST : xarray.dataArray object + regionSST : xarray.DataArray object values of SST in the nino region calendar: {'gregorian', 'gregorian_noleap'} @@ -138,18 +139,22 @@ def compute_nino34_index(regionSST, calendar): # {{{ Last Modified ------------- - 03/23/2017 + 03/30/2017 """ if not isinstance(regionSST, xr.core.dataarray.DataArray): raise ValueError('regionSST should be an xarray DataArray') + + # add 'month' data array so we can group by month below. + regionSST = climatology.add_months_and_days_in_month(regionSST, calendar) + # Compute monthly average and anomaly of climatology of SST - monthlyClimatology = climatology.compute_monthly_climatology(regionSST, - calendar) - anomalySST = regionSST.groupby('month') - monthlyClimatology + monthlyClimatology = climatology.compute_monthly_climatology(regionSST) + + anomaly = regionSST.groupby('month') - monthlyClimatology - return _running_mean(anomalySST.to_pandas()) # }}} + return _running_mean(anomaly.to_pandas()) # }}} def compute_nino34_spectra(nino34Index): # {{{ diff --git a/mpas_analysis/ocean/ocean_modelvsobs.py b/mpas_analysis/ocean/ocean_modelvsobs.py index 0a4dce558..0b363a5c9 100644 --- a/mpas_analysis/ocean/ocean_modelvsobs.py +++ b/mpas_analysis/ocean/ocean_modelvsobs.py @@ -9,7 +9,7 @@ Last Modified ------------- -03/23/2017 +03/29/2017 """ import xarray as xr @@ -29,13 +29,14 @@ from ..shared.generalized_reader.generalized_reader \ import open_multifile_dataset -from ..shared.timekeeping.utility import get_simulation_start_time, \ - days_to_datetime +from ..shared.timekeeping.utility import get_simulation_start_time from ..shared.climatology import climatology from ..shared.analysis_task import setup_task +from ..shared.mpas_xarray import mpas_xarray + def ocn_modelvsobs(config, field): @@ -56,7 +57,7 @@ def ocn_modelvsobs(config, field): Last Modified ------------- - 03/23/2017 + 03/29/2017 """ # perform common setup for the task @@ -123,6 +124,8 @@ def ocn_modelvsobs(config, field): iselvals = None + obsFieldName = 'mld_dt_mean' + if buildObsClimatologies: # Load MLD observational data dsObs = xr.open_mfdataset(obsFileName) @@ -133,17 +136,18 @@ def ocn_modelvsobs(config, field): # Rename the dimensions to be consistent with other obs. data sets dsObs.rename({'month': 'calmonth', 'lat': 'latCoord', 'lon': 'lonCoord'}, inplace=True) - dsObs.rename({'iMONTH': 'month', 'iLAT': 'lat', 'iLON': 'lon'}, + dsObs.rename({'iMONTH': 'Time', 'iLAT': 'lat', 'iLON': 'lon'}, inplace=True) # set the coordinates now that the dimensions have the same names dsObs.coords['lat'] = dsObs['latCoord'] dsObs.coords['lon'] = dsObs['lonCoord'] - dsObs.coords['month'] = dsObs['calmonth'] + dsObs.coords['Time'] = dsObs['calmonth'] + dsObs.coords['month'] = ('Time', np.array(dsObs['calmonth'], int)) + dsObs = mpas_xarray.subset_variables(dsObs, [obsFieldName, + 'month']) # Reorder dataset for consistence with other obs. data sets - dsObs = dsObs.transpose('month', 'lat', 'lon') - - obsFieldName = 'mld_dt_mean' + dsObs = dsObs.transpose('Time', 'lat', 'lon') # Set appropriate MLD figure labels observationTitleLabel = \ @@ -169,11 +173,10 @@ def ocn_modelvsobs(config, field): if buildObsClimatologies: dsObs = xr.open_mfdataset(obsFileName) - - dsTimeSlice = dsObs.sel(time=slice(timeStart, timeEnd)) - monthlyClimatology = dsTimeSlice.groupby('time.month').mean('time') - - dsObs = monthlyClimatology.transpose('month', 'lat', 'lon') + dsObs.rename({'time': 'Time'}, inplace=True) + dsObs = dsObs.transpose('Time', 'lat', 'lon') + dsObs = dsObs.sel(Time=slice(timeStart, timeEnd)) + dsObs.coords['month'] = dsObs['Time.month'] obsFieldName = 'SST' @@ -194,16 +197,10 @@ def ocn_modelvsobs(config, field): if buildObsClimatologies: dsObs = xr.open_mfdataset(obsFileName) - dsTimeSlice = dsObs.sel(time=slice(timeStart, timeEnd)) - - # The following line converts from DASK to numpy to supress an odd - # warning that doesn't influence the figure output - dsTimeSlice.SSS.values - - monthlyClimatology = dsTimeSlice.groupby('time.month').mean('time') - - # Rename the observation data for code compactness - dsObs = monthlyClimatology.transpose('month', 'lat', 'lon') + dsObs.rename({'time': 'Time'}, inplace=True) + dsObs = dsObs.transpose('Time', 'lat', 'lon') + dsObs = dsObs.sel(Time=slice(timeStart, timeEnd)) + dsObs.coords['month'] = dsObs['Time.month'] obsFieldName = 'SSS' @@ -225,8 +222,6 @@ def ocn_modelvsobs(config, field): changed, startYear, endYear = \ climatology.update_start_end_year(ds, config, calendar) - monthlyClimatology = climatology.compute_monthly_climatology(ds, calendar) - mpasMappingFileName = climatology.write_mpas_mapping_file( config=config, meshFileName=restartFileName) @@ -253,8 +248,8 @@ def ocn_modelvsobs(config, field): monthNames=months) if overwriteMpasClimatology or not os.path.exists(climatologyFileName): - seasonalClimatology = climatology.compute_seasonal_climatology( - monthlyClimatology, monthValues, field) + seasonalClimatology = climatology.compute_climatology( + ds, monthValues, calendar) # write out the climatology so we can interpolate it with # interpolate.remap seasonalClimatology.to_netcdf(climatologyFileName) @@ -283,8 +278,8 @@ def ocn_modelvsobs(config, field): if (overwriteObsClimatology or (not os.path.exists(climatologyFileName) and not os.path.exists(regriddedFileName))): - seasonalClimatology = climatology.compute_seasonal_climatology( - dsObs, monthValues, obsFieldName) + seasonalClimatology = climatology.compute_climatology( + dsObs, monthValues) # Either we want to overwite files or neither the climatology # nor its regridded counterpart exist. Write out the # climatology so we can interpolate it with interpolate.remap diff --git a/mpas_analysis/sea_ice/modelvsobs.py b/mpas_analysis/sea_ice/modelvsobs.py index 779b5e34f..95de80f43 100644 --- a/mpas_analysis/sea_ice/modelvsobs.py +++ b/mpas_analysis/sea_ice/modelvsobs.py @@ -8,7 +8,7 @@ Last Modified ------------- -03/23/2017 +03/29/2017 """ import os @@ -34,8 +34,7 @@ from ..shared.generalized_reader.generalized_reader \ import open_multifile_dataset -from ..shared.timekeeping.utility import get_simulation_start_time, \ - days_to_datetime +from ..shared.timekeeping.utility import get_simulation_start_time from .utility import setup_sea_ice_task @@ -65,7 +64,7 @@ def seaice_modelvsobs(config, streamMap=None, variableMap=None): Last Modified ------------- - 03/23/2017 + 03/29/2017 """ # perform common setup for the task @@ -128,20 +127,15 @@ def seaice_modelvsobs(config, streamMap=None, variableMap=None): changed, startYear, endYear = \ climatology.update_start_end_year(ds, config, calendar) - monthlyClimatology = climatology.compute_monthly_climatology(ds, calendar) - mpasMappingFileName = climatology.write_mpas_mapping_file( config=config, meshFileName=restartFileName) - _compute_and_plot_concentration(config, monthlyClimatology, - mpasMappingFileName) + _compute_and_plot_concentration(config, ds, mpasMappingFileName, calendar) - _compute_and_plot_thickness(config, monthlyClimatology, - mpasMappingFileName) + _compute_and_plot_thickness(config, ds, mpasMappingFileName, calendar) -def _compute_and_plot_concentration(config, monthlyClimatology, - mpasMappingFileName): +def _compute_and_plot_concentration(config, ds, mpasMappingFileName, calendar): """ Given a config file, monthly climatology on the mpas grid, and the data necessary to perform horizontal interpolation to a comparison grid, @@ -152,18 +146,22 @@ def _compute_and_plot_concentration(config, monthlyClimatology, ---------- config : an instance of MpasConfigParser - monthlyClimatology : an xarray data set containing a monthly climatology + ds : ``xarray.Dataset`` object + an xarray data set from which to compute climatologies mpasMappingFileName : The name of a mapping file used to perform interpolation of MPAS model results + calendar: ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores + Authors ------- Xylar Asay-Davis, Milena Veneziani Last Modified ------------- - 03/03/2017 + 03/29/2017 """ print " Make ice concentration plots..." @@ -230,8 +228,8 @@ def _compute_and_plot_concentration(config, monthlyClimatology, monthNames=months) if overwriteMpasClimatology or not os.path.exists(climatologyFileName): - seasonalClimatology = climatology.compute_seasonal_climatology( - monthlyClimatology, monthValues, field) + seasonalClimatology = climatology.compute_climatology( + ds, monthValues, calendar) # write out the climatology so we can interpolate it with # interpolate.remap seasonalClimatology.to_netcdf(climatologyFileName) @@ -329,8 +327,7 @@ def _compute_and_plot_concentration(config, monthlyClimatology, cbarlabel="fraction") -def _compute_and_plot_thickness(config, monthlyClimatology, - mpasMappingFileName): +def _compute_and_plot_thickness(config, ds, mpasMappingFileName, calendar): """ Given a config file, monthly climatology on the mpas grid, and the data necessary to perform horizontal interpolation to a comparison grid, @@ -341,18 +338,22 @@ def _compute_and_plot_thickness(config, monthlyClimatology, ---------- config : an instance of MpasConfigParser - monthlyClimatology : an xarray data set containing a monthly climatology + ds : ``xarray.Dataset`` object + an xarray data set from which to compute climatologies mpasMappingFileName : The name of a mapping file used to perform interpolation of MPAS model results + calendar: ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores + Authors ------- Xylar Asay-Davis, Milena Veneziani Last Modified ------------- - 03/03/2017 + 03/29/2017 """ print " Make ice thickness plots..." @@ -411,8 +412,8 @@ def _compute_and_plot_thickness(config, monthlyClimatology, monthNames=months) if overwriteMpasClimatology or not os.path.exists(climatologyFileName): - seasonalClimatology = climatology.compute_seasonal_climatology( - monthlyClimatology, monthValues, field) + seasonalClimatology = climatology.compute_climatology( + ds, monthValues, calendar) # write out the climatology so we can interpolate it with # interpolate.remap. Set _FillValue so ncremap doesn't produce # an error diff --git a/mpas_analysis/shared/climatology/climatology.py b/mpas_analysis/shared/climatology/climatology.py index ced990b6c..cbcb747e6 100644 --- a/mpas_analysis/shared/climatology/climatology.py +++ b/mpas_analysis/shared/climatology/climatology.py @@ -14,9 +14,8 @@ import os import numpy import netCDF4 -from warnings import warn +import warnings -from ..mpas_xarray import mpas_xarray from ..constants import constants from ..timekeeping.utility import days_to_datetime @@ -386,25 +385,30 @@ def get_observation_climatology_file_names(config, fieldName, monthNames, return (climatologyFileName, regriddedFileName) -def compute_monthly_climatology(ds, calendar): +def compute_monthly_climatology(ds, calendar=None): """ - Compute a monthly climatology data set from a data set with Time expressed - as days since 0001-01-01 with the given calendar. + Compute monthly climatologies from a data set. The mean is weighted but + the number of days in each month of the data set, ignoring values masked + out with NaNs. If the month coordinate is not present, a data array + ``month`` will be added based on ``Time`` and the provided calendar. Parameters ---------- - ds : instance of xarray.DataSet - A data set with a 'Time' coordinate expressed as days since - 0001-01-01 + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 or ``month`` coordinate - calendar: {'gregorian', 'gregorian_noleap'} - The name of one of the calendars supported by MPAS cores + calendar: ``{'gregorian', 'gregorian_noleap'}``, optional + The name of one of the calendars supported by MPAS cores, used to + determine ``month`` from ``Time`` coordinate, so must be supplied if + ``ds`` does not already have a ``month`` coordinate or data array Returns ------- - monthlyClimatology : instance of xarray.DataSet - A data set with a new 'month' coordinate, - containing monthly climatologies of all variables in ds + climatology : object of same type as ``ds`` + A data set without the ``'Time'`` coordinate containing the mean + of ds over all months in monthValues, weighted by the number of days + in each month. Authors ------- @@ -412,35 +416,50 @@ def compute_monthly_climatology(ds, calendar): Last Modified ------------- - 02/25/2017 + 03/30/2017 """ - months = [date.month for date in days_to_datetime(ds.Time, - calendar=calendar)] - ds.coords['month'] = ('Time', months) - monthlyClimatology = ds.groupby('month').mean('Time') + def compute_one_month_climatology(ds): + monthValues = list(ds.month.values) + return compute_climatology(ds, monthValues, calendar) + + if 'month' not in ds.coords or 'daysInMonth' not in ds.coords: + ds = add_months_and_days_in_month(ds, calendar) + + monthlyClimatology = \ + ds.groupby('month').apply(compute_one_month_climatology) + return monthlyClimatology -def compute_seasonal_climatology(monthlyClimatology, monthValues, - variableName): +def compute_climatology(ds, monthValues, calendar=None): """ - Given a monthly climatology, compute a seasonal climatology weighted by - the number of days in each month (on the no-leap-year calendar). + Compute a monthly, seasonal or annual climatology data set from a data + set. The mean is weighted but the number of days in each month of + the data set, ignoring values masked out with NaNs. If the month + coordinate is not present, a data array ``month`` will be added based + on ``Time`` and the provided calendar. Parameters ---------- - monthlyClimatology : instance of xarray.DataSet - A data set containing a monthly climatology + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 or ``month`` coordinate monthValues : int or array-like of ints A single month or an array of months to be averaged together - before interpolation. + + calendar: ``{'gregorian', 'gregorian_noleap'}``, optional + The name of one of the calendars supported by MPAS cores, used to + determine ``month`` from ``Time`` coordinate, so must be supplied if + ``ds`` does not already have a ``month`` coordinate or data array Returns ------- - seasonalClimatology : instance of xarray.DataSet - A data set containing the seasonal climatology + climatology : object of same type as ``ds`` + A data set without the ``'Time'`` coordinate containing the mean + of ds over all months in monthValues, weighted by the number of days + in each month. Authors ------- @@ -448,32 +467,22 @@ def compute_seasonal_climatology(monthlyClimatology, monthValues, Last Modified ------------- - 02/27/2017 + 03/30/2017 """ - # select only the desired field from the obs. data set - monthlyClimatology = mpas_xarray.subset_variables( - monthlyClimatology, variableList=[variableName]) + if ('month' not in ds.coords or 'daysInMonth' not in ds.coords): + ds = add_months_and_days_in_month(ds, calendar) - # Make a DataArray with the number of days in each month - daysInMonth = xr.DataArray(numpy.array(constants.daysInMonth, float), - coords=[monthlyClimatology.month], - name='daysInMonth') + mask = xr.zeros_like(ds.month, bool) - daysInMonth = daysInMonth.sel(month=monthValues) - monthlyClimatology = monthlyClimatology.sel(month=monthValues) + for month in monthValues: + mask = xr.ufuncs.logical_or(mask, ds.month == month) - varArray = monthlyClimatology[variableName] - mask = xr.DataArray(~numpy.isnan(varArray.values), - coords=varArray.coords, - name='mask') - seasonalClimatology = (monthlyClimatology * daysInMonth).sum( - dim='month', keep_attrs=True) + climatologyMonths = ds.where(mask, drop=True) - days = (mask * daysInMonth).sum(dim='month') - seasonalClimatology /= days.where(days > 0.) + climatology = _compute_masked_mean(climatologyMonths) - return seasonalClimatology + return climatology def update_start_end_year(ds, config, calendar): @@ -483,7 +492,7 @@ def update_start_end_year(ds, config, calendar): Parameters ---------- - ds : instance of xarray.DataSet + ds : instance of xarray.Dataset A data set from which start and end years will be determined config : instance of MpasAnalysisConfigParser @@ -515,12 +524,9 @@ def update_start_end_year(ds, config, calendar): endYear = days_to_datetime(ds.Time.max().values, calendar=calendar).year changed = False if startYear != requestedStartYear or endYear != requestedEndYear: - warn("climatology start and/or end year different from requested\n" - "requestd: {:04d}-{:04d}\n" - "actual: {:04d}-{:04d}\n".format(requestedStartYear, - requestedEndYear, - startYear, - endYear)) + warnings.warn("climatology start and/or end year different from requested\n" + "requestd: {:04d}-{:04d}\n" + "actual: {:04d}-{:04d}\n".format(requestedStartYear, requestedEndYear, startYear, endYear)) config.set('climatology', 'startYear', str(startYear)) config.set('climatology', 'endYear', str(endYear)) changed = True @@ -528,6 +534,95 @@ def update_start_end_year(ds, config, calendar): return changed, startYear, endYear +def add_months_and_days_in_month(ds, calendar): + ''' + Add ``months`` and ``daysInMonth`` as data arrays in ``ds``. The number + of days in each month of ``ds`` is computed either using the ``startTime`` + and ``endTime`` if available or assuming ``gregorian_noleap`` calendar and + ignoring leap years. + + Parameters + ---------- + ds : ``xarray.Dataset`` or ``xarray.DataArray`` object + A data set with a ``Time`` coordinate expressed as days since + 0001-01-01 + + calendar: ``{'gregorian', 'gregorian_noleap'}`` + The name of one of the calendars supported by MPAS cores, used to + determine ``month`` from ``Time`` coordinate + + Returns + ------- + ds : object of same type as ``ds`` + The data set with ``month`` and ``daysInMonth`` data arrays added (if + not already present) + + Authors + ------- + Xylar Asay-Davis + + Last Modified + ------------- + 03/29/2017 + """ ''' + + ds = ds.copy() + + if 'month' not in ds.coords: + if calendar is None: + raise ValueError('calendar must be provided if month coordinate is not in ds') + months = [date.month for date in days_to_datetime(ds.Time, + calendar=calendar)] + + ds.coords['month'] = ('Time', months) + + if 'daysInMonth' not in ds.coords: + if 'startTime' in ds.coords and 'endTime' in ds.coords: + ds.coords['daysInMonth'] = ds.endTime - ds.startTime + else: + if calendar == 'gregorian': + warnings.warn('The MPAS run used the Gregorian calendar but does not appear to have\n' + 'supplied start and end times. Climatologies will be computed with\n' + 'month durations ignoring leap years.') + # TODO: support leap years if calendar is 'gregorian' + daysInMonth = numpy.array([constants.daysInMonth[month-1] for + month in ds.month.values], float) + ds.coords['daysInMonth'] = ('Time', daysInMonth) + + return ds + + +def _compute_masked_mean(ds): + ''' + Compute the time average of data set, masked out where the variables in ds + are NaN and weighting by the number of days used to compute each monthly + mean time in ds. + ''' + def ds_to_weights(ds): + # make an identical data set to ds but replacing all data arrays with + # nonnull applied to that data array + weights = ds.copy(deep=True) + if isinstance(ds, xr.core.dataarray.DataArray): + weights = ds.notnull() + elif isinstance(ds, xr.core.dataset.Dataset): + for var in ds.data_vars: + weights[var] = ds[var].notnull() + else: + raise TypeError('ds must be an instance of either xarray.Dataset or xarray.DataArray.') + + return weights + + dsWeightedSum = (ds * ds.daysInMonth).sum(dim='Time', keep_attrs=True) + + weights = ds_to_weights(ds) + + weightSum = (weights * ds.daysInMonth).sum(dim='Time') + + timeMean = dsWeightedSum / weightSum.where(weightSum > 0.) + + return timeMean + + def _get_comparison_lat_lon(comparisonLatRes, comparisonLonRes): ''' Returns the lat and lon arrays defining the corners of the comparison diff --git a/mpas_analysis/shared/mpas_xarray/mpas_xarray.py b/mpas_analysis/shared/mpas_xarray/mpas_xarray.py index be68316ab..3183b5cc5 100644 --- a/mpas_analysis/shared/mpas_xarray/mpas_xarray.py +++ b/mpas_analysis/shared/mpas_xarray/mpas_xarray.py @@ -482,59 +482,63 @@ def _parse_dataset_time(ds, inTimeVariableName, calendar, ends = dsEnd[outTimeVariableName].values # replace the time in starts with the mean of starts and ends - dsStart[outTimeVariableName] = [starts[i] + - (ends[i] - starts[i])/2 - for i in range(len(starts))] - - return dsStart - - # The remainder of the function is for cases when there is just one time - # variable (either because we're recursively calling the function or - # because we're not averaging). The contents of the time variable is - # expected to be either a string (|S64) or a float (meaning days since - # start of the simulation). - - timeVar = ds[inTimeVariableName] - - if timeVar.dtype == '|S64': - # this is an array of date strings like 'xtime' - # convert to string - timeStrings = [''.join(xtime).strip() for xtime in timeVar.values] - days = string_to_days_since_date(dateString=timeStrings, - referenceDate=referenceDate, - calendar=calendar) + dsOut = dsStart.copy() - elif timeVar.dtype == 'float64': - # this array contains floating-point days like 'daysSinceStartOfSim' + dsOut.coords['startTime'] = (outTimeVariableName, starts) + dsOut.coords['endTime'] = (outTimeVariableName, ends) - if simulationStartTime is None: - raise ValueError('MPAS time variable {} appears to be a number of ' - 'days since start of sim but ' - 'simulationStartTime was not ' - 'supplied.'.format(inTimeVariableName)) + dsOut.coords[outTimeVariableName] = (outTimeVariableName, + [starts[i] + + (ends[i] - starts[i])/2 + for i in range(len(starts))]) - if (string_to_datetime(referenceDate) == - string_to_datetime(simulationStartTime)): - days = timeVar.values - else: - # a conversion may be required - dates = days_to_datetime(days=timeVar.values, - referenceDate=simulationStartTime, - calendar=calendar) - days = datetime_to_days(dates=dates, - referenceDate=referenceDate, - calendar=calendar) - - elif timeVar.dtype == 'timedelta64[ns]': - raise TypeError("timeVar of unsupported type {}. This is likely " - "because xarray.open_dataset was called with " - "decode_times=True.".format(timeVar.dtype)) else: - raise TypeError("timeVar of unsupported type {}".format( - timeVar.dtype)) - dsOut = ds.copy() - dsOut.coords[outTimeVariableName] = (outTimeVariableName, days) + # there is just one time variable (either because we're recursively + # calling the function or because we're not averaging). + + # The contents of the time variable is expected to be either a string + # (|S64) or a float (meaning days since start of the simulation). + + timeVar = ds[inTimeVariableName] + + if timeVar.dtype == '|S64': + # this is an array of date strings like 'xtime' + # convert to string + timeStrings = [''.join(xtime).strip() for xtime in timeVar.values] + days = string_to_days_since_date(dateString=timeStrings, + referenceDate=referenceDate, + calendar=calendar) + + elif timeVar.dtype == 'float64': + # this array contains floating-point days like + # 'daysSinceStartOfSim' + + if simulationStartTime is None: + raise ValueError('MPAS time variable {} appears to be a number of days since start \n' + 'of sim but simulationStartTime was not supplied.'.format(inTimeVariableName)) + + if (string_to_datetime(referenceDate) == + string_to_datetime(simulationStartTime)): + days = timeVar.values + else: + # a conversion may be required + dates = days_to_datetime(days=timeVar.values, + referenceDate=simulationStartTime, + calendar=calendar) + days = datetime_to_days(dates=dates, + referenceDate=referenceDate, + calendar=calendar) + + elif timeVar.dtype == 'timedelta64[ns]': + raise TypeError('timeVar of unsupported type {}. This is likely because xarray.open_dataset \n' + 'was called with decode_times=True, which can mangle MPAS times.'.format(timeVar.dtype)) + else: + raise TypeError("timeVar of unsupported type {}".format( + timeVar.dtype)) + + dsOut = ds.copy() + dsOut.coords[outTimeVariableName] = (outTimeVariableName, days) return dsOut # }}} diff --git a/mpas_analysis/test/__init__.py b/mpas_analysis/test/__init__.py index 19c5381c7..8580ad7a5 100644 --- a/mpas_analysis/test/__init__.py +++ b/mpas_analysis/test/__init__.py @@ -74,6 +74,10 @@ def assertArrayEqual(self, a1, a2): def assertApproxEqual(self, a1, a2, rtol=1e-5, atol=1e-8): assert np.isclose(a1, a2, rtol=rtol, atol=atol) + @requires_numpy + def assertArrayApproxEqual(self, a1, a2, rtol=1e-5, atol=1e-8): + assert np.all(np.isclose(a1, a2, rtol=rtol, atol=atol)) + @contextmanager def assertWarns(self, message): with warnings.catch_warnings(record=True) as w: diff --git a/mpas_analysis/test/test_climatology.py b/mpas_analysis/test/test_climatology.py new file mode 100644 index 000000000..c06e3d475 --- /dev/null +++ b/mpas_analysis/test/test_climatology.py @@ -0,0 +1,250 @@ +""" +Unit test infrastructure for climatologies. + +Xylar Asay-Davis +04/03/2017 +""" + +import pytest +import tempfile +import shutil +import os +import numpy +import xarray + +from mpas_analysis.test import TestCase, loaddatadir +from mpas_analysis.shared.generalized_reader.generalized_reader \ + import open_multifile_dataset +from mpas_analysis.configuration.MpasAnalysisConfigParser \ + import MpasAnalysisConfigParser +from mpas_analysis.shared.climatology import climatology +from mpas_analysis.shared.constants import constants + +@pytest.mark.usefixtures("loaddatadir") +class TestClimatology(TestCase): + + def setUp(self): + # Create a temporary directory + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + # Remove the directory after the test + shutil.rmtree(self.test_dir) + + def setup_config(self, autocloseFileLimitFraction=0.5, + maxChunkSize=10000): + config = MpasAnalysisConfigParser() + config.add_section('input') + config.set('input', 'autocloseFileLimitFraction', + str(autocloseFileLimitFraction)) + config.set('input', 'maxChunkSize', str(maxChunkSize)) + config.set('input', 'mpasMeshName', 'QU240') + + config.add_section('output') + config.set('output', 'baseDirectory', self.test_dir) + config.set('output', 'mappingSubdirectory', '.') + config.set('output', 'mpasClimatologySubdirectory', 'clim/mpas') + config.set('output', 'mpasRegriddedClimSubdirectory', + 'clim/mpas/regrid') + + config.add_section('climatology') + config.set('climatology', 'startYear', '2') + config.set('climatology', 'endYear', '2') + config.set('climatology', 'comparisonLatResolution', '0.5') + config.set('climatology', 'comparisonLonResolution', '0.5') + + config.set('climatology', 'overwriteMapping', 'False') + config.set('climatology', 'overwriteMpasClimatology', 'False') + config.set('climatology', 'mpasInterpolationMethod', 'bilinear') + + config.add_section('oceanObservations') + config.set('oceanObservations', 'interpolationMethod', 'bilinear') + config.set('oceanObservations', 'climatologySubdirectory', 'clim/obs') + config.set('oceanObservations', 'regriddedClimSubdirectory', + 'clim/obs/regrid') + + return config + + def test_write_mpas_mapping_file(self): + config = self.setup_config() + mpasMeshFileName = '{}/mpasMesh.nc'.format(self.datadir) + climatology.write_mpas_mapping_file(config, mpasMeshFileName) + + mappingFileName = '{}/map_QU240_to_0.5x0.5degree_' \ + 'bilinear.nc'.format(self.test_dir) + assert os.path.exists(mappingFileName) + + mappingFileName = '{}/mapping.nc'.format(self.test_dir) + config.set('climatology', 'mpasMappingFile', mappingFileName) + climatology.write_mpas_mapping_file(config, mpasMeshFileName) + assert os.path.exists(mappingFileName) + + def test_write_observations_mapping_file(self): + config = self.setup_config() + gridFileName = '{}/obsGrid.nc'.format(self.datadir) + componentName = 'ocean' + fieldName = 'sst' + climatology.write_observations_mapping_file(config, + componentName, + fieldName, + gridFileName, + latVarName='lat', + lonVarName='lon') + + mappingFileName = '{}/map_obs_{}_1.0x1.0degree_to_0.5x0.5degree_' \ + 'bilinear.nc'.format(self.test_dir, fieldName) + assert os.path.exists(mappingFileName) + + mappingFileName = '{}/mapping.nc'.format(self.test_dir) + config.set('oceanObservations', 'sstClimatologyMappingFile', + mappingFileName) + climatology.write_observations_mapping_file(config, + componentName, + fieldName, + gridFileName, + latVarName='lat', + lonVarName='lon') + assert os.path.exists(mappingFileName) + + def test_get_mpas_climatology_file_names(self): + config = self.setup_config() + fieldName = 'sst' + monthNames = 'JFM' + (climatologyFileName, regriddedFileName) = \ + climatology.get_mpas_climatology_file_names(config, fieldName, + monthNames) + expectedClimatologyFileName = '{}/clim/mpas/sst_QU240_JFM_' \ + 'years0002-0002.nc'.format(self.test_dir) + self.assertEqual(climatologyFileName, expectedClimatologyFileName) + + expectedRegriddedFileName = '{}/clim/mpas/regrid/sst_QU240_to_' \ + '0.5x0.5degree_JFM_' \ + 'years0002-0002.nc'.format(self.test_dir) + self.assertEqual(regriddedFileName, expectedRegriddedFileName) + + def test_get_observation_climatology_file_names(self): + config = self.setup_config() + fieldName = 'sst' + monthNames = 'JFM' + gridFileName = '{}/obsGrid.nc'.format(self.datadir) + componentName = 'ocean' + (climatologyFileName, regriddedFileName) = \ + climatology.get_observation_climatology_file_names( + config, fieldName, monthNames, componentName, gridFileName, + latVarName='lat', lonVarName='lon') + expectedClimatologyFileName = '{}/clim/obs/sst_1.0x1.0degree_' \ + 'JFM.nc'.format(self.test_dir) + self.assertEqual(climatologyFileName, expectedClimatologyFileName) + + expectedRegriddedFileName = '{}/clim/obs/regrid/sst_1.0x1.0degree_' \ + 'to_0.5x0.5degree_' \ + 'JFM.nc'.format(self.test_dir) + self.assertEqual(regriddedFileName, expectedRegriddedFileName) + + def open_test_ds(self, config, calendar): + fileNames = ['{}/timeSeries.0002-{:02d}-01.nc'.format(self.datadir, + month) + for month in [1, 2, 3]] + + variableMap = {'mld': ['timeMonthly_avg_tThreshMLD'], + 'Time': [['xtime_startMonthly', 'xtime_endMonthly']]} + variableList = ['mld'] + + ds = open_multifile_dataset( + fileNames=fileNames, + calendar=calendar, + config=config, + timeVariableName='Time', + variableList=variableList, + variableMap=variableMap) + + assert(len(ds.Time) == 3) + return ds + + def test_compute_climatology(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + + assert('month' not in ds.coords.keys()) + assert('daysInMonth' not in ds.coords.keys()) + + # test add_months_and_days_in_month + ds = climatology.add_months_and_days_in_month(ds, calendar) + + self.assertArrayEqual(ds.month.values, [1, 2, 3]) + self.assertArrayEqual(numpy.round(ds.daysInMonth.values), [31, 28, 31]) + + # test compute_climatology on a data set + monthNames = 'JFM' + monthValues = constants.monthDictionary[monthNames] + dsClimatology = climatology.compute_climatology(ds, monthValues, + calendar) + + assert('Time' not in dsClimatology.dims.keys()) + + self.assertEqual(dsClimatology.data_vars.keys(), ['mld']) + + climFileName = '{}/refSeasonalClim.nc'.format(self.datadir) + refClimatology = xarray.open_dataset(climFileName) + self.assertArrayApproxEqual(dsClimatology.mld.values, + refClimatology.mld.values) + + # test compute_climatology on a data array + mldClimatology = climatology.compute_climatology(ds.mld, monthValues, + calendar) + + assert('Time' not in mldClimatology.dims) + + self.assertArrayApproxEqual(dsClimatology.mld.values, + mldClimatology.values) + + # for good measure... + self.assertArrayApproxEqual(mldClimatology.values, + refClimatology.mld.values) + + def test_compute_monthly_climatology(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + + monthlyClimatology = climatology.compute_monthly_climatology(ds, + calendar) + + assert(len(monthlyClimatology.month) == 3) + + self.assertEqual(monthlyClimatology.data_vars.keys(), ['mld']) + + climFileName = '{}/refMonthlyClim.nc'.format(self.datadir) + refClimatology = xarray.open_dataset(climFileName) + + self.assertArrayApproxEqual(monthlyClimatology.mld.values, + refClimatology.mld.values) + + self.assertArrayApproxEqual(monthlyClimatology.month.values, + refClimatology.month.values) + + def test_update_start_end_year(self): + config = self.setup_config() + calendar = 'gregorian_noleap' + ds = self.open_test_ds(config, calendar) + + changed, startYear, endYear = \ + climatology.update_start_end_year(ds, config, calendar) + + assert(not changed) + assert(startYear == 2) + assert(endYear == 2) + + config.set('climatology', 'endYear', '50') + ds = self.open_test_ds(config, calendar) + + with self.assertWarns('climatology start and/or end year different from requested'): + changed, startYear, endYear = \ + climatology.update_start_end_year(ds, config, calendar) + + assert(changed) + assert(startYear == 2) + assert(endYear == 2) + +# vim: foldmethod=marker ai ts=4 sts=4 et sw=4 ft=python diff --git a/mpas_analysis/test/test_climatology/mpasMesh.nc b/mpas_analysis/test/test_climatology/mpasMesh.nc new file mode 120000 index 000000000..880a52c2e --- /dev/null +++ b/mpas_analysis/test/test_climatology/mpasMesh.nc @@ -0,0 +1 @@ +../test_interpolate/mpasMesh.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_climatology/obsGrid.nc b/mpas_analysis/test/test_climatology/obsGrid.nc new file mode 100644 index 000000000..e595400d1 Binary files /dev/null and b/mpas_analysis/test/test_climatology/obsGrid.nc differ diff --git a/mpas_analysis/test/test_climatology/refMonthlyClim.nc b/mpas_analysis/test/test_climatology/refMonthlyClim.nc new file mode 100644 index 000000000..6f0b8e589 Binary files /dev/null and b/mpas_analysis/test/test_climatology/refMonthlyClim.nc differ diff --git a/mpas_analysis/test/test_climatology/refSeasonalClim.nc b/mpas_analysis/test/test_climatology/refSeasonalClim.nc new file mode 100644 index 000000000..cdf54bac9 Binary files /dev/null and b/mpas_analysis/test/test_climatology/refSeasonalClim.nc differ diff --git a/mpas_analysis/test/test_climatology/timeSeries.0002-01-01.nc b/mpas_analysis/test/test_climatology/timeSeries.0002-01-01.nc new file mode 120000 index 000000000..6c56dfeb4 --- /dev/null +++ b/mpas_analysis/test/test_climatology/timeSeries.0002-01-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-01-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_climatology/timeSeries.0002-02-01.nc b/mpas_analysis/test/test_climatology/timeSeries.0002-02-01.nc new file mode 120000 index 000000000..7d127a423 --- /dev/null +++ b/mpas_analysis/test/test_climatology/timeSeries.0002-02-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-02-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_climatology/timeSeries.0002-03-01.nc b/mpas_analysis/test/test_climatology/timeSeries.0002-03-01.nc new file mode 120000 index 000000000..a7e210ef8 --- /dev/null +++ b/mpas_analysis/test/test_climatology/timeSeries.0002-03-01.nc @@ -0,0 +1 @@ +../test_interpolate/timeSeries.0002-03-01.nc \ No newline at end of file diff --git a/mpas_analysis/test/test_generalized_reader.py b/mpas_analysis/test/test_generalized_reader.py index 351bb07dd..10a3c489d 100644 --- a/mpas_analysis/test/test_generalized_reader.py +++ b/mpas_analysis/test/test_generalized_reader.py @@ -139,7 +139,7 @@ def test_open_process_climatology(self): variableList=['mld'], variableMap=variableMap) - # note, the asserts for autoclose below are only guaranteed + # note, the asserts for autoclose below are only guaranteed # to work immediately following call to open_multifile_dataset assert hasattr(ds, '_autoclose'), \ '`autoclose` not defined for dataset'