Skip to content

Commit

Permalink
bug #201; Error in model_utils.get_s_x_extrapolate()
Browse files Browse the repository at this point in the history
  • Loading branch information
capelastegui committed Mar 24, 2021
1 parent dc4ddbd commit 1f5c8a6
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 14 deletions.
28 changes: 20 additions & 8 deletions anticipy/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,8 @@ def get_s_x_extrapolate(
date_start_actuals,
date_end_actuals,
model=None,
freq='W',
freq=None,
extrapolate_years=2.5,
shifted_origin=0,
scaling_factor=100.0,
x_start_actuals=0.):
"""
Expand All @@ -155,6 +154,8 @@ def get_s_x_extrapolate(
:param scaling_factor: Value used for scaling a_x for certain model
functions
:type scaling_factor: float
:param x_start_actuals: numeric index for the first actuals sample
:type x_start_actuals: int
:return: Series of floats with DateTimeIndex. To be used as (a_date, a_x)
input for a model function.
:rtype: pandas.Series
Expand All @@ -169,8 +170,17 @@ def get_s_x_extrapolate(
date_start_actuals = pd.to_datetime(date_start_actuals)
date_end_actuals = pd.to_datetime(date_end_actuals)

weekday_adjustment = date_start_actuals.weekday()
expected_freq = dict_wday_name.get(weekday_adjustment)
if freq is None: # Default frequency
freq = 'W'
freq = expected_freq
else:
if freq.startswith('W'):
assert expected_freq == freq, \
'Error: with weekly frequency, freq ' \
'parameter must match weekday of date_start_actuals:' \
' {} - {} , {}' \
.format(freq, expected_freq, date_start_actuals)

freq_short = freq[0:1] # Changes e.g. W-MON to W
# freq_units_per_year = 52.0 if freq_short=='W' else 365.0
Expand All @@ -182,13 +192,16 @@ def get_s_x_extrapolate(
date_end_forecast = date_end_actuals + \
pd.DateOffset(**offset_input)

index = pd.date_range(
i_date = pd.date_range(
date_start_actuals,
date_end_forecast,
freq=freq,
name='date')
a_x = get_normalized_x_from_date(pd.Series(index)).values
s_x = pd.Series(index=index, data=a_x)
s_date = pd.Series(i_date)

# Get days passed since date_start, then add x_start_actuals
s_x = (s_date - date_start_actuals).dt.days + x_start_actuals
s_x.index = i_date
else:
# Otherwise, use numeric index
# we extrapolate future samples equal to 100*extrapolate_years
Expand All @@ -202,10 +215,9 @@ def get_s_x_extrapolate(
index=index,
data=np.arange(
x_start_actuals,
x_start_actuals + index.size)) + shifted_origin
x_start_actuals + index.size)) + x_start_actuals
if model_requires_scaling(model):
s_x = s_x / scaling_factor

return s_x


Expand Down
20 changes: 19 additions & 1 deletion tests/test_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,7 +1670,25 @@ def test_run_forecast_naive(self):
df_forecast = dict_result['forecast']
logger_info('df_forecast', df_forecast)

logger.info('Test 3b: weight column, season_add_mult = \'both\'')
logger.info('Test 3b - initial sample is 0-weight, '
'extrapolate_years=0')
df1.weight[0:2] = 0.
logger_info('df1:', df1)

dict_result = run_forecast(
simplify_output=False, df_y=df1,
l_model_trend=[forecast_models.model_naive],
extrapolate_years=0)

df_data = dict_result['data']
df_metadata = dict_result['metadata']
df_optimize_info = dict_result['optimize_info']

logger_info('df_metadata:', df_metadata)
logger_info('df_optimize_info:', df_optimize_info)
logger_info('df_data:', df_data.groupby(['source', 'model']).tail(60))

logger.info('Test 3c: weight column, season_add_mult = \'both\'')

df1 = pd.DataFrame(
{'y': np.arange(0, 10.),
Expand Down
36 changes: 31 additions & 5 deletions tests/test_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def test_apply_a_x_scaling(self):
logger.info('f_model: %s', model)
logger_info('a_x', a_x)

def test_get_a_x_date_extrapolate(self):
def test_get_s_x_extrapolate(self):
# TODO: TEST Output size, scenarios with different frequencies
l_df_y = [
# Single ts
Expand All @@ -74,9 +74,6 @@ def test_get_a_x_date_extrapolate(self):
]
l_time_resolutions = [
# Default config
'W-SUN',
'W',
'W-MON',
'D',
'MS',
'YS'
Expand Down Expand Up @@ -128,7 +125,7 @@ def test_get_a_x_date_extrapolate(self):

ts = l_df_y[0]
model = l_models[0]
time_resolution = l_time_resolutions[0]
time_resolution = None # Default - weekly frequency
s_x = get_s_x_extrapolate(
ts.index.min(),
ts.index.max(),
Expand Down Expand Up @@ -213,6 +210,35 @@ def test_get_a_x_date_extrapolate(self):
logger_info('t_values len', len(s_x))
self.assertEqual(len(s_x), 10 + 3.0 * 365)

def test_get_s_x_extrapolate_gap(self):
# Test get_s_x_extrapolate with a gap at the start of actuals

"""
Context - see #201
There is a problem caused when:
- Actuals data has 0-weight samples at the start
"""
logger.info('Test 1 - default settings')
x_start_actuals = 100
s_x = get_s_x_extrapolate(
'2021-03-06', '2021-04-30',
extrapolate_years=1.0 / 365, # 1 day
x_start_actuals=x_start_actuals
)
logger_info('s_x:', s_x)
self.assertEqual(s_x.iloc[0], x_start_actuals)

logger.info('Test 2 - daily freq')
x_start_actuals = 100
s_x = get_s_x_extrapolate(
'2021-03-06', '2021-03-10',
extrapolate_years=1.0 / 365, # 1 day
x_start_actuals=x_start_actuals,
freq='D'
)
logger_info('s_x:', s_x)
self.assertEqual(s_x.iloc[0], x_start_actuals)

def test_get_aic_c(self):

# Known error scenario: 0 error, 1 parameters - should return -inf
Expand Down

0 comments on commit 1f5c8a6

Please sign in to comment.