bug #201; Error in model_utils.get_s_x_extrapolate()

sky-uk · Mar 24, 2021 · 1f5c8a6 · 1f5c8a6
1 parent dc4ddbd
commit 1f5c8a6
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 14 deletions.
diff --git a/anticipy/model_utils.py b/anticipy/model_utils.py
@@ -128,9 +128,8 @@ def get_s_x_extrapolate(
  date_start_actuals,
  date_end_actuals,
  model=None,
- freq='W',
+ freq=None,
  extrapolate_years=2.5,
- shifted_origin=0,
  scaling_factor=100.0,
  x_start_actuals=0.):
  """
@@ -155,6 +154,8 @@ def get_s_x_extrapolate(
  :param scaling_factor: Value used for scaling a_x for certain model
  functions
  :type scaling_factor: float
+ :param x_start_actuals: numeric index for the first actuals sample
+ :type x_start_actuals: int
  :return: Series of floats with DateTimeIndex. To be used as (a_date, a_x)
  input for a model function.
  :rtype: pandas.Series
@@ -169,8 +170,17 @@ def get_s_x_extrapolate(
  date_start_actuals = pd.to_datetime(date_start_actuals)
  date_end_actuals = pd.to_datetime(date_end_actuals)
 
+ weekday_adjustment = date_start_actuals.weekday()
+ expected_freq = dict_wday_name.get(weekday_adjustment)
  if freq is None: # Default frequency
- freq = 'W'
+ freq = expected_freq
+ else:
+ if freq.startswith('W'):
+ assert expected_freq == freq, \
+ 'Error: with weekly frequency, freq ' \
+ 'parameter must match weekday of date_start_actuals:' \
+ ' {} - {} , {}' \
+ .format(freq, expected_freq, date_start_actuals)
 
  freq_short = freq[0:1] # Changes e.g. W-MON to W
  # freq_units_per_year = 52.0 if freq_short=='W' else 365.0
@@ -182,13 +192,16 @@ def get_s_x_extrapolate(
  date_end_forecast = date_end_actuals + \
  pd.DateOffset(**offset_input)
 
- index = pd.date_range(
+ i_date = pd.date_range(
  date_start_actuals,
  date_end_forecast,
  freq=freq,
  name='date')
- a_x = get_normalized_x_from_date(pd.Series(index)).values
- s_x = pd.Series(index=index, data=a_x)
+ s_date = pd.Series(i_date)
+
+ # Get days passed since date_start, then add x_start_actuals
+ s_x = (s_date - date_start_actuals).dt.days + x_start_actuals
+ s_x.index = i_date
  else:
  # Otherwise, use numeric index
  # we extrapolate future samples equal to 100*extrapolate_years
@@ -202,10 +215,9 @@ def get_s_x_extrapolate(
  index=index,
  data=np.arange(
  x_start_actuals,
- x_start_actuals + index.size)) + shifted_origin
+ x_start_actuals + index.size)) + x_start_actuals
  if model_requires_scaling(model):
  s_x = s_x / scaling_factor
-
  return s_x
 
 

diff --git a/tests/test_forecast.py b/tests/test_forecast.py
@@ -1670,7 +1670,25 @@ def test_run_forecast_naive(self):
  df_forecast = dict_result['forecast']
  logger_info('df_forecast', df_forecast)
 
- logger.info('Test 3b: weight column, season_add_mult = \'both\'')
+ logger.info('Test 3b - initial sample is 0-weight, '
+ 'extrapolate_years=0')
+ df1.weight[0:2] = 0.
+ logger_info('df1:', df1)
+
+ dict_result = run_forecast(
+ simplify_output=False, df_y=df1,
+ l_model_trend=[forecast_models.model_naive],
+ extrapolate_years=0)
+
+ df_data = dict_result['data']
+ df_metadata = dict_result['metadata']
+ df_optimize_info = dict_result['optimize_info']
+
+ logger_info('df_metadata:', df_metadata)
+ logger_info('df_optimize_info:', df_optimize_info)
+ logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))
+
+ logger.info('Test 3c: weight column, season_add_mult = \'both\'')
 
  df1 = pd.DataFrame(
  {'y': np.arange(0, 10.),

diff --git a/tests/test_model_utils.py b/tests/test_model_utils.py
@@ -51,7 +51,7 @@ def test_apply_a_x_scaling(self):
  logger.info('f_model: %s', model)
  logger_info('a_x', a_x)
 
- def test_get_a_x_date_extrapolate(self):
+ def test_get_s_x_extrapolate(self):
  # TODO: TEST Output size, scenarios with different frequencies
  l_df_y = [
  # Single ts
@@ -74,9 +74,6 @@ def test_get_a_x_date_extrapolate(self):
  ]
  l_time_resolutions = [
  # Default config
- 'W-SUN',
- 'W',
- 'W-MON',
  'D',
  'MS',
  'YS'
@@ -128,7 +125,7 @@ def test_get_a_x_date_extrapolate(self):
 
  ts = l_df_y[0]
  model = l_models[0]
- time_resolution = l_time_resolutions[0]
+ time_resolution = None # Default - weekly frequency
  s_x = get_s_x_extrapolate(
  ts.index.min(),
  ts.index.max(),
@@ -213,6 +210,35 @@ def test_get_a_x_date_extrapolate(self):
  logger_info('t_values len', len(s_x))
  self.assertEqual(len(s_x), 10 + 3.0 * 365)
 
+ def test_get_s_x_extrapolate_gap(self):
+ # Test get_s_x_extrapolate with a gap at the start of actuals
+
+ """
+ Context - see #201
+ There is a problem caused when:
+ - Actuals data has 0-weight samples at the start
+ """
+ logger.info('Test 1 - default settings')
+ x_start_actuals = 100
+ s_x = get_s_x_extrapolate(
+ '2021-03-06', '2021-04-30',
+ extrapolate_years=1.0 / 365, # 1 day
+ x_start_actuals=x_start_actuals
+ )
+ logger_info('s_x:', s_x)
+ self.assertEqual(s_x.iloc[0], x_start_actuals)
+
+ logger.info('Test 2 - daily freq')
+ x_start_actuals = 100
+ s_x = get_s_x_extrapolate(
+ '2021-03-06', '2021-03-10',
+ extrapolate_years=1.0 / 365, # 1 day
+ x_start_actuals=x_start_actuals,
+ freq='D'
+ )
+ logger_info('s_x:', s_x)
+ self.assertEqual(s_x.iloc[0], x_start_actuals)
+
  def test_get_aic_c(self):
 
  # Known error scenario: 0 error, 1 parameters - should return -inf