Skip to content

Commit

Permalink
Speed up functions that use time dimension
Browse files Browse the repository at this point in the history
  • Loading branch information
bouweandela committed Sep 3, 2022
1 parent ed81a3c commit 8041191
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 83 deletions.
18 changes: 11 additions & 7 deletions esmvalcore/cmor/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def _get_next_month(month, year):

def _get_time_bounds(time, freq):
bounds = []
for step, point in enumerate(time.points):
month = time.cell(step).point.month
year = time.cell(step).point.year
dates = time.units.num2date(time.points)
for step, date in enumerate(dates):
month = date.month
year = date.year
if freq in ['mon', 'mo']:
next_month, next_year = _get_next_month(month, year)
min_bound = date2num(datetime(year, month, 1, 0, 0),
Expand All @@ -61,6 +62,7 @@ def _get_time_bounds(time, freq):
'3hr': 1.5 / 24,
'1hr': 0.5 / 24,
}
point = time.points[step]
min_bound = point - delta[freq]
max_bound = point + delta[freq]
bounds.append([min_bound, max_bound])
Expand Down Expand Up @@ -885,9 +887,10 @@ def _check_time_coord(self):
if freq.lower().endswith('pt'):
freq = freq[:-2]
if freq in ['mon', 'mo']:
dates = coord.units.num2date(coord.points)
for i in range(len(coord.points) - 1):
first = coord.cell(i).point
second = coord.cell(i + 1).point
first = dates[i]
second = dates[i + 1]
second_month = first.month + 1
second_year = first.year
if second_month == 13:
Expand All @@ -899,9 +902,10 @@ def _check_time_coord(self):
self.report_error(msg, var_name, freq)
break
elif freq == 'yr':
dates = coord.units.num2date(coord.points)
for i in range(len(coord.points) - 1):
first = coord.cell(i).point
second = coord.cell(i + 1).point
first = dates[i]
second = dates[i + 1]
second_month = first.month + 1
if first.year + 1 != second.year:
msg = '{}: Frequency {} does not match input data'
Expand Down
120 changes: 46 additions & 74 deletions esmvalcore/preprocessor/_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,36 +78,13 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month,
ValueError
if time ranges are outside the cube time limits
"""
time_coord = cube.coord('time')
time_units = time_coord.units
if time_units.calendar == '360_day':
if start_day > 30:
start_day = 30
if end_day > 30:
end_day = 30
t_1 = PartialDateTime(year=int(start_year),
month=int(start_month),
day=int(start_day))
t_2 = PartialDateTime(year=int(end_year),
month=int(end_month),
day=int(end_day))

constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)

cube_slice = cube.extract(constraint)
if cube_slice is None:
raise ValueError(
f"Time slice {start_year:0>4d}-{start_month:0>2d}-{start_day:0>2d}"
f" to {end_year:0>4d}-{end_month:0>2d}-{end_day:0>2d} is outside "
f"cube time bounds {time_coord.cell(0)} to {time_coord.cell(-1)}.")

# Issue when time dimension was removed when only one point as selected.
if cube_slice.ndim != cube.ndim:
if cube_slice.coord('time') == time_coord:
logger.debug('No change needed to time.')
return cube

return cube_slice
if not cube.coords('time'):
return cube
start_datetime = datetime.datetime(
int(start_year), int(start_month), int(start_day))
end_datetime = datetime.datetime(
int(end_year), int(end_month), int(end_day))
return _extract_datetime(cube, start_datetime, end_datetime)


def _parse_start_date(date):
Expand Down Expand Up @@ -157,22 +134,11 @@ def _duration_to_date(duration, reference, sign):
return date


def _restore_time_coord_position(cube, original_time_index):
"""Restore original ordering of coordinates."""
# Coordinates before time
new_order = list(np.arange(original_time_index) + 1)

# Time coordinate
new_order.append(0)

# Coordinates after time
new_order = new_order + list(range(original_time_index + 1, cube.ndim))

# Transpose cube in-place
cube.transpose(new_order)


def _extract_datetime(cube, start_datetime, end_datetime):
def _extract_datetime(
cube: iris.cube.Cube,
start_datetime: datetime.datetime,
end_datetime: datetime.datetime,
):
"""Extract a time range from a cube.
Given a time range passed in as a datetime.datetime object, it
Expand Down Expand Up @@ -206,39 +172,42 @@ def _extract_datetime(cube, start_datetime, end_datetime):
if end_datetime.day > 30:
end_datetime = end_datetime.replace(day=30)

t_1 = PartialDateTime(year=int(start_datetime.year),
month=int(start_datetime.month),
day=int(start_datetime.day),
hour=int(start_datetime.hour),
minute=int(start_datetime.minute),
second=int(start_datetime.second))

t_2 = PartialDateTime(year=int(end_datetime.year),
month=int(end_datetime.month),
day=int(end_datetime.day),
hour=int(end_datetime.hour),
minute=int(end_datetime.minute),
second=int(end_datetime.second))

constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)
cube_slice = cube.extract(constraint)
t_1 = PartialDateTime(year=start_datetime.year,
month=start_datetime.month,
day=start_datetime.day,
hour=start_datetime.hour,
minute=start_datetime.minute,
second=start_datetime.second,)

t_2 = PartialDateTime(year=end_datetime.year,
month=end_datetime.month,
day=end_datetime.day,
hour=end_datetime.hour,
minute=end_datetime.minute,
second=end_datetime.second)

if not cube.coord_dims(time_coord):
constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)
cube_slice = cube.extract(constraint)
else:
# Convert all time points to dates at once, this is much faster
# than using a constraint.
dates = time_coord.units.num2date(time_coord.points)
select = (dates >= t_1) & (dates < t_2)
if select.any():
time_dim = cube.coord_dims(time_coord)[0]
slices = tuple(select if d == time_dim else slice(None)
for d in range(cube.ndim))
cube_slice = cube[slices]
else:
cube_slice = None

if cube_slice is None:
raise ValueError(
f"Time slice {start_datetime.strftime('%Y-%m-%d')} "
f"to {end_datetime.strftime('%Y-%m-%d')} is outside "
f"cube time bounds {time_coord.cell(0)} to {time_coord.cell(-1)}.")

# If only a single point in time is extracted, the new time coordinate of
# cube_slice is a scalar coordinate. Convert this back to a regular
# dimensional coordinate with length 1. Note that iris.util.new_axis always
# puts the new axis at index 0, so we need to reorder the coordinates in
# case the original time coordinate was not at index 0.
if cube_slice.ndim < cube.ndim:
cube_slice = iris.util.new_axis(cube_slice, 'time')
original_time_index = cube.coord_dims(time_coord)[0]
if original_time_index != 0:
_restore_time_coord_position(cube_slice, original_time_index)

return cube_slice


Expand All @@ -262,6 +231,8 @@ def clip_timerange(cube, timerange):
ValueError
Time ranges are outside the cube's time limits.
"""
if not cube.coords('time'):
return cube
start_date = timerange.split('/')[0]
start_date = _parse_start_date(start_date)

Expand Down Expand Up @@ -911,7 +882,8 @@ def regrid_time(cube, frequency):
cube with converted time axis and units.
"""
# standardize time points
time_c = [cell.point for cell in cube.coord('time').cells()]
coord = cube.coord('time')
time_c = coord.units.num2date(coord.points)
if frequency == 'yr':
time_cells = [datetime.datetime(t.year, 7, 1, 0, 0, 0) for t in time_c]
elif frequency == 'mon':
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/preprocessor/_time/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ def test_extract_time_non_gregorian_day(self):
),
0,
)
sliced = extract_time(cube, 1950, 2, 30, 1950, 3, 1)
assert_array_equal(np.array([59]), sliced.coord('time').points)
sliced = extract_time(cube, 1950, 1, 30, 1950, 2, 1)
assert_array_equal(np.array([29]), sliced.coord('time').points)

def test_extract_time_no_slice(self):
"""Test fail of extract_time."""
Expand Down

0 comments on commit 8041191

Please sign in to comment.