feat(datacollection): Consolidate Code

ladybug-tools · Mar 5, 2019 · ceae4e2 · ceae4e2
2 parents d028f9e + 3a3843c
commit ceae4e2
Show file tree

Hide file tree

Showing 8 changed files with 275 additions and 162 deletions.
diff --git a/ladybug/_datacollectionbase.py b/ladybug/_datacollectionbase.py
@@ -580,6 +580,17 @@ def _percentile(self, values, percent, key=lambda x: x):
  d1 = key(vals[int(c)]) * (k - f)
  return d0 + d1
 
+ def _average(self, vals):
+ return sum(vals) / len(vals)
+
+ def _total(self, vals):
+ return sum(vals)
+
+ def _get_percentile_function(self, percentile):
+ def percentile_function(vals):
+ return self._percentile(vals, percentile)
+ return percentile_function
+
  def __len__(self):
  return len(self._values)
 
@@ -599,6 +610,11 @@ def __iter__(self):
  def __contains__(self, item):
  return item in self._values
 
+ @property
+ def is_continuous(self):
+ """Boolean denoting whether the data collection is continuous."""
+ return False
+
  @property
  def isDataCollection(self):
  """Return True."""

diff --git a/ladybug/datacollection.py b/ladybug/datacollection.py
@@ -166,33 +166,11 @@ def group_by_day(self):
 
  def average_daily(self):
  """Return a daily collection of values averaged for each day."""
- data_dict = self.group_by_day()
- avg_data, d_times = [], []
- for i in self.header.analysis_period.doys_int:
- vals = data_dict[i]
- if vals != []:
- avg_data.append(sum(vals) / len(vals))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = 'average'
- collection = DailyCollection(new_header, avg_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('daily', 'average')
 
  def total_daily(self):
  """Return a daily collection of values totaled over each day."""
- data_dict = self.group_by_day()
- total_data, d_times = [], []
- for i in self.header.analysis_period.doys_int:
- vals = data_dict[i]
- if vals != []:
- total_data.append(sum(vals))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = 'total'
- collection = DailyCollection(new_header, total_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('daily', 'total')
 
  def percentile_daily(self, percentile):
  """Return a daily collection of values at the input percentile of each day.
@@ -201,20 +179,7 @@ def percentile_daily(self, percentile):
  percentile: A float value from 0 to 100 representing the
  requested percentile.
  """
- assert 0 <= percentile <= 100, \
- 'percentile must be between 0 and 100. Got {}'.format(percentile)
- data_dict = self.group_by_day()
- per_data, d_times = [], []
- for i in self.header.analysis_period.doys_int:
- vals = data_dict[i]
- if vals != []:
- per_data.append(self._percentile(vals, percentile))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = '{} percentile'.format(percentile)
- collection = DailyCollection(new_header, per_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('daily', 'percentile', percentile)
 
  def group_by_month(self):
  """Return a dictionary of this collection's values grouped by each month.
@@ -230,33 +195,11 @@ def group_by_month(self):
 
  def average_monthly(self):
  """Return a monthly collection of values averaged for each month."""
- data_dict = self.group_by_month()
- avg_data, d_times = [], []
- for i in self.header.analysis_period.months_int:
- vals = data_dict[i]
- if vals != []:
- avg_data.append(sum(vals)/len(vals))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = 'average'
- collection = MonthlyCollection(new_header, avg_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('monthly', 'average')
 
  def total_monthly(self):
  """Return a monthly collection of values totaled over each month."""
- data_dict = self.group_by_month()
- total_data, d_times = [], []
- for i in self.header.analysis_period.months_int:
- vals = data_dict[i]
- if vals != []:
- total_data.append(sum(vals))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = 'total'
- collection = MonthlyCollection(new_header, total_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('monthly', 'total')
 
  def percentile_monthly(self, percentile):
  """Return a monthly collection of values at the input percentile of each month.
@@ -265,20 +208,7 @@ def percentile_monthly(self, percentile):
  percentile: A float value from 0 to 100 representing the
  requested percentile.
  """
- assert 0 <= percentile <= 100, \
- 'percentile must be between 0 and 100. Got {}'.format(percentile)
- data_dict = self.group_by_month()
- per_data, d_times = [], []
- for i in self.header.analysis_period.months_int:
- vals = data_dict[i]
- if vals != []:
- per_data.append(self._percentile(vals, percentile))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = '{} percentile'.format(percentile)
- collection = MonthlyCollection(new_header, per_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('monthly', 'percentile', percentile)
 
  def group_by_month_per_hour(self):
  """Return a dictionary of this collection's values grouped by each month per hour.
@@ -298,33 +228,11 @@ def group_by_month_per_hour(self):
 
  def average_monthly_per_hour(self):
  """Return a monthly per hour data collection of average values."""
- data_dict = self.group_by_month_per_hour()
- avg_data, d_times = [], []
- for i in self.header.analysis_period.months_per_hour:
- vals = data_dict[i]
- if vals != []:
- avg_data.append(sum(vals)/len(vals))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = 'average'
- collection = MonthlyPerHourCollection(new_header, avg_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('monthlyperhour', 'average')
 
  def total_monthly_per_hour(self):
  """Return a monthly per hour collection of totaled values."""
- data_dict = self.group_by_month_per_hour()
- total_data, d_times = [], []
- for i in self.header.analysis_period.months_per_hour:
- vals = data_dict[i]
- if vals != []:
- total_data.append(sum(vals))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = 'total'
- collection = MonthlyPerHourCollection(new_header, total_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('monthlyperhour', 'total')
 
  def percentile_monthly_per_hour(self, percentile):
  """Return a monthly per hour collection of values at the input percentile.
@@ -333,20 +241,7 @@ def percentile_monthly_per_hour(self, percentile):
  percentile: A float value from 0 to 100 representing the
  requested percentile.
  """
- assert 0 <= percentile <= 100, \
- 'percentile must be between 0 and 100. Got {}'.format(percentile)
- data_dict = self.group_by_month_per_hour()
- total_data, d_times = [], []
- for i in self.header.analysis_period.months_per_hour:
- vals = data_dict[i]
- if vals != []:
- total_data.append(self._percentile(vals, percentile))
- d_times.append(i)
- new_header = self.header.duplicate()
- new_header.metadata['operation'] = '{} percentile'.format(percentile)
- collection = MonthlyPerHourCollection(new_header, total_data, d_times)
- collection._validated_a_period = True
- return collection
+ return self._time_interval_operation('monthlyperhour', 'percentile', percentile)
 
  def interpolate_holes(self):
  """Linearly interpolate over holes in this collection to make it continuous.
@@ -550,6 +445,54 @@ def _check_analysis_period(self, analysis_period):
  'Collection header. {} != {}'.format(
  analysis_period.is_leap_year, self.header.analysis_period.is_leap_year)
 
+ def _time_interval_operation(self, interval, operation, percentile=0):
+ """Get a collection of a certain time interval with a given math operation."""
+ # retrive the function that correctly describes the operation
+ if operation == 'average':
+ funct = self._average
+ elif operation == 'total':
+ funct = self._total
+ else:
+ assert 0 <= percentile <= 100, \
+ 'percentile must be between 0 and 100. Got {}'.format(percentile)
+ funct = self._get_percentile_function(percentile)
+
+ # retrive the data that correctly describes the time interval
+ if interval == 'monthly':
+ data_dict = self.group_by_month()
+ dates = self.header.analysis_period.months_int
+ elif interval == 'daily':
+ data_dict = self.group_by_day()
+ dates = self.header.analysis_period.doys_int
+ elif interval == 'monthlyperhour':
+ data_dict = self.group_by_month_per_hour()
+ dates = self.header.analysis_period.months_per_hour
+ else:
+ raise ValueError('Invalid input value for interval: {}'.format(interval))
+ # get the data and header for the new collection
+ new_data, d_times = [], []
+ for i in dates:
+ vals = data_dict[i]
+ if vals != []:
+ new_data.append(funct(vals))
+ d_times.append(i)
+ new_header = self.header.duplicate()
+ if operation == 'percentile':
+ new_header.metadata['operation'] = '{} percentile'.format(percentile)
+ else:
+ new_header.metadata['operation'] = operation
+
+ # build the final data collection
+ if interval == 'monthly':
+ collection = MonthlyCollection(new_header, new_data, d_times)
+ elif interval == 'daily':
+ collection = DailyCollection(new_header, new_data, d_times)
+ elif interval == 'monthlyperhour':
+ collection = MonthlyPerHourCollection(new_header, new_data, d_times)
+
+ collection._validated_a_period = True
+ return collection
+
  @property
  def isHourly(self):
  return True
@@ -967,6 +910,11 @@ def _get_analysis_period_subset(self, a_per):
  else:
  return AnalysisPeriod(*n_ap)
 
+ @property
+ def is_continuous(self):
+ """Boolean denoting whether the data collection is continuous."""
+ return True
+
  @property
  def isContinuous(self):
  return True
@@ -1041,6 +989,36 @@ def filter_by_doys(self, doys):
  _filt_header = self.header.duplicate()
  return DailyCollection(_filt_header, _filt_values, _filt_datetimes)
 
+ def group_by_month(self):
+ """Return a dictionary of this collection's values grouped by each month.
+
+ Key values are between 1-12.
+ """
+ data_by_month = OrderedDict()
+ for d in xrange(1, 13):
+ data_by_month[d] = []
+ for v, doy in zip(self._values, self.datetimes):
+ dt = DateTime.from_hoy(doy * 24)
+ data_by_month[dt.month].append(v)
+ return data_by_month
+
+ def average_monthly(self):
+ """Return a monthly collection of values averaged for each month."""
+ return self._monthly_operation('average')
+
+ def total_monthly(self):
+ """Return a monthly collection of values totaled over each month."""
+ return self._monthly_operation('total')
+
+ def percentile_monthly(self, percentile):
+ """Return a monthly collection of values at the input percentile of each month.
+
+ Args:
+ percentile: A float value from 0 to 100 representing the
+ requested percentile.
+ """
+ return self._monthly_operation('percentile', percentile)
+
  def validate_analysis_period(self):
  """Get a collection where the header analysis_period aligns with datetimes.
 
@@ -1111,6 +1089,46 @@ def _check_analysis_period(self, analysis_period):
  'Collection header. {} != {}'.format(
  analysis_period.is_leap_year, self.header.analysis_period.is_leap_year)
 
+ def _monthly_operation(self, operation, percentile=0):
+ """Get a MonthlyCollection given a certain operation."""
+ # Retrive the correct operation.
+ if operation == 'average':
+ funct = self._average
+ elif operation == 'total':
+ funct = self._total
+ else:
+ assert 0 <= percentile <= 100, \
+ 'percentile must be between 0 and 100. Got {}'.format(percentile)
+ funct = self._get_percentile_function(percentile)
+
+ # Get the data for the new collection
+ data_dict = self.group_by_month()
+ new_data, d_times = [], []
+ for i in self.header.analysis_period.months_int:
+ vals = data_dict[i]
+ if vals != []:
+ new_data.append(funct(vals))
+ d_times.append(i)
+
+ # build the new monthly collection
+ new_header = self.header.duplicate()
+ if operation == 'percentile':
+ new_header.metadata['operation'] = '{} percentile'.format(percentile)
+ else:
+ new_header.metadata['operation'] = operation
+ collection = MonthlyCollection(new_header, new_data, d_times)
+ collection._validated_a_period = True
+ return collection
+
+ @property
+ def is_continuous(self):
+ """Boolean denoting whether the data collection is continuous."""
+ if self._validated_a_period is True and \
+ len(self.values) == len(self.header.analysis_period.doys_int):
+ return True
+ else:
+ return False
+
  @property
  def isDaily(self):
  return True
@@ -1233,6 +1251,15 @@ def validate_analysis_period(self):
  new_coll._validated_a_period = True
  return new_coll
 
+ @property
+ def is_continuous(self):
+ """Boolean denoting whether the data collection is continuous."""
+ if self._validated_a_period is True and \
+ len(self.values) == len(self.header.analysis_period.months_int):
+ return True
+ else:
+ return False
+
  @property
  def isMonthly(self):
  return True
@@ -1369,6 +1396,16 @@ def validate_analysis_period(self):
  new_coll._validated_a_period = True
  return new_coll
 
+ @property
+ def is_continuous(self):
+ """Boolean denoting whether the data collection is continuous."""
+ a_per = self.header.analysis_period
+ if self._validated_a_period is True and a_per.st_hour == 0 and a_per.end_hour \
+ == 23 and len(self.values) == len(a_per.months_per_hour):
+ return True
+ else:
+ return False
+
  @property
  def isMonthlyPerHour(self):
  return True