From 0457a4c78e83b1ee4f8235694ca80a6d40293d90 Mon Sep 17 00:00:00 2001 From: Filotas Theodosiou <70523417+FilTheo@users.noreply.github.com> Date: Fri, 24 Jan 2025 17:47:32 +0200 Subject: [PATCH 1/3] full adam example (beta) --- python/smooth/adam_general/test_adam.ipynb | 434 +++++++++++++++++++++ 1 file changed, 434 insertions(+) create mode 100644 python/smooth/adam_general/test_adam.ipynb diff --git a/python/smooth/adam_general/test_adam.ipynb b/python/smooth/adam_general/test_adam.ipynb new file mode 100644 index 00000000..a49c1b00 --- /dev/null +++ b/python/smooth/adam_general/test_adam.ipynb @@ -0,0 +1,434 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from core.adam import Adam\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from core.checker import parameters_checker\n", + "from typing import List, Union, Dict, Any\n", + "from smooth.adam_general._adam_general import adam_fitter, adam_forecaster\n", + "from core.utils.utils import measurement_inverter, scaler, calculate_likelihood, calculate_entropy, calculate_multistep_loss\n", + "from numpy.linalg import eigvals\n", + "import nlopt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from core.estimator import estimator, selector\n", + "from core.creator import creator, initialiser, architector, filler\n", + "from core.utils.ic import ic_function\n", + "\n", + "from smooth.adam_general._adam_general import adam_fitter, adam_forecaster\n", + "\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate random monthly time series data\n", + "np.random.seed(41) # For reproducibility\n", + "n_points = 24 # 2 years of monthly data\n", + "time_series = np.random.randint(1, 100, size=n_points).cumsum() # Random walk with strictly positive integers\n", + "dates = pd.date_range(start='2023-01-01', periods=n_points, freq='M') # Monthly frequency\n", + "ts_df = pd.DataFrame({'value': time_series}, index=dates)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The rpy2.ipython extension is already loaded. To reload it, use:\n", + " %reload_ext rpy2.ipython\n" + ] + } + ], + "source": [ + "%load_ext rpy2.ipython" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alpha level \n", + " 1.00000 64.99598 \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/rpy2/robjects/pandas2ri.py:56: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", + " for name, values in obj.iteritems():\n" + ] + } + ], + "source": [ + "%%R -i ts_df\n", + "library(smooth)\n", + "\n", + "model <- adam(ts_df, model = \"ANN\", lags = c(12))\n", + "\n", + "forecast(model, h = 12)\n", + "model$B" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "model = \"ANN\"\n", + "lags = [12]\n", + "\n", + "multisteps = False,\n", + "lb = None,\n", + "ub = None,\n", + "maxtime = None,\n", + "print_level = 1, # 1 or 0\n", + "maxeval = None,\n", + "h = 12\n", + "\n", + "\n", + "# Assume that the model is not provided\n", + "# these will be default arguments\n", + "profiles_recent_provided = False\n", + "profiles_recent_table = None\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
value
2023-01-3165
2023-02-28101
2023-03-31114
2023-04-30213
2023-05-31294
2023-06-30360
2023-07-31450
2023-08-31474
2023-09-30560
2023-10-31587
2023-11-30644
2023-12-31648
2024-01-31684
2024-02-29735
2024-03-31806
2024-04-30891
2024-05-31978
2024-06-301040
2024-07-311092
2024-08-311185
2024-09-301274
2024-10-311350
2024-11-301443
2024-12-311496
\n", + "
" + ], + "text/plain": [ + " value\n", + "2023-01-31 65\n", + "2023-02-28 101\n", + "2023-03-31 114\n", + "2023-04-30 213\n", + "2023-05-31 294\n", + "2023-06-30 360\n", + "2023-07-31 450\n", + "2023-08-31 474\n", + "2023-09-30 560\n", + "2023-10-31 587\n", + "2023-11-30 644\n", + "2023-12-31 648\n", + "2024-01-31 684\n", + "2024-02-29 735\n", + "2024-03-31 806\n", + "2024-04-30 891\n", + "2024-05-31 978\n", + "2024-06-30 1040\n", + "2024-07-31 1092\n", + "2024-08-31 1185\n", + "2024-09-30 1274\n", + "2024-10-31 1350\n", + "2024-11-30 1443\n", + "2024-12-31 1496" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts_df" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial value is not selected. Switching to optimal.\n", + "Initial parameters: [1.000e-01 1.574e+02]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\n", + "/home/filtheo/smooth/python/smooth/adam_general/core/utils/utils.py:329: RuntimeWarning: overflow encountered in square\n", + " return np.sqrt(np.sum(errors**2) / obs_in_sample)\n", + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:86: RuntimeWarning: overflow encountered in reduce\n", + " return ufunc.reduce(obj, axis, dtype, out, **passkwargs)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\n" + ] + }, + { + "data": { + "text/plain": [ + "2025-01-31 13.475472\n", + "2025-02-28 13.475472\n", + "2025-03-31 13.475472\n", + "2025-04-30 13.475472\n", + "2025-05-31 13.475472\n", + "2025-06-30 13.475472\n", + "2025-07-31 13.475472\n", + "2025-08-31 13.475472\n", + "2025-09-30 13.475472\n", + "2025-10-31 13.475472\n", + "2025-11-30 13.475472\n", + "2025-12-31 13.475472\n", + "Freq: M, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adam = Adam(model, lags)\n", + "adam.fit(ts_df, h = h)\n", + "fc = adam.predict()\n", + "fc['forecast']" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B': array([1.000e-01, 1.574e+02]),\n", + " 'CF_value': 1611.9508210967872,\n", + " 'n_param_estimated': 2,\n", + " 'log_lik_adam_value': {'value': -1611.9508210967872, 'nobs': 24, 'df': 3},\n", + " 'arima_polynomials': None}" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adam.adam_estimated" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[True]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fc['initial_estimated']" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 8cdc65e6c2cc47eaaadcefde4973509e7c0608c8 Mon Sep 17 00:00:00 2001 From: Filotas Theodosiou <70523417+FilTheo@users.noreply.github.com> Date: Fri, 24 Jan 2025 17:48:09 +0200 Subject: [PATCH 2/3] step-by-step functions example --- python/smooth/adam_general/test_3.ipynb | 3563 +++++++++++++++++++++++ 1 file changed, 3563 insertions(+) create mode 100644 python/smooth/adam_general/test_3.ipynb diff --git a/python/smooth/adam_general/test_3.ipynb b/python/smooth/adam_general/test_3.ipynb new file mode 100644 index 00000000..4770ad00 --- /dev/null +++ b/python/smooth/adam_general/test_3.ipynb @@ -0,0 +1,3563 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from core.checker import parameters_checker\n", + "from typing import List, Union, Dict, Any\n", + "from smooth.adam_general._adam_general import adam_fitter, adam_forecaster\n", + "from core.utils.utils import measurement_inverter, scaler, calculate_likelihood, calculate_entropy, calculate_multistep_loss\n", + "from numpy.linalg import eigvals\n", + "import nlopt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "from core.estimator import estimator, selector, preparator\n", + "from core.creator import creator, initialiser, architector, filler\n", + "from core.utils.ic import ic_function\n", + "\n", + "from smooth.adam_general._adam_general import adam_fitter, adam_forecaster\n", + "\n", + "import warnings" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/rpy2/robjects/pandas2ri.py:14: FutureWarning: pandas.core.index is deprecated and will be removed in a future version. The public classes are available in the top-level namespace.\n", + " from pandas.core.index import Index as PandasIndex\n", + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/rpy2/robjects/pandas2ri.py:34: UserWarning: pandas >= 1.0 is not supported.\n", + " warnings.warn('pandas >= 1.0 is not supported.')\n" + ] + } + ], + "source": [ + "%load_ext rpy2.ipython" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Generate random monthly time series data\n", + "np.random.seed(42) # For reproducibility\n", + "n_points = 24 # 2 years of monthly data\n", + "time_series = np.random.randint(1, 100, size=n_points).cumsum() # Random walk with strictly positive integers\n", + "dates = pd.date_range(start='2023-01-01', periods=n_points, freq='M') # Monthly frequency\n", + "ts_df = pd.DataFrame({'value': time_series}, index=dates)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alpha level \n", + " 1.00000 51.99987 " + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/rpy2/robjects/pandas2ri.py:56: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", + " for name, values in obj.iteritems():\n" + ] + } + ], + "source": [ + "%%R -i ts_df\n", + "library(smooth)\n", + "\n", + "model <- adam(ts_df, model = \"ANN\", lags = c(12))\n", + "\n", + "forecast(model, h = 12)\n", + "model$B" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/rpy2/robjects/pandas2ri.py:56: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.\n", + " for name, values in obj.iteritems():\n", + "R[write to console]: Loading required package: greybox\n", + "\n", + "R[write to console]: Package \"greybox\", v1.0.5 loaded.\n", + "\n", + "\n", + "R[write to console]: This is package \"smooth\", v3.1.6\n", + "\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ets_model: TRUE \n", + "e_type: A \n", + "t_type: N \n", + "s_type: N \n", + "model_is_trendy: FALSE \n", + "model_is_seasonal: FALSE \n", + "lags: 1 \n", + "lags_model: 1 \n", + "lags_model_arima: 0 \n", + "lags_model_all: 1 \n", + "lags_model_max: 1 \n", + "profiles_recent_table: \n", + "profiles_recent_provided: FALSE \n", + "obs_states: 25 \n", + "obs_in_sample: 24 \n", + "obs_all: 24 \n", + "components_number_ets: 1 \n", + "components_number_ets_seasonal: 0 \n", + "components_names_ets: level \n", + "ot_logical: TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE \n", + "y_in_sample: 52, 145, 160, 232, 293, 314, 397, 484, 559, 634, 722, 746, 749, 771, 824, 826, 914, 944, 982, 984, 1048, 1108, 1129, 1162 \n", + "persistence: \n", + "persistence_estimate: TRUE \n", + "persistence_level: \n", + "persistence_level_estimate: TRUE \n", + "persistence_trend: \n", + "persistence_trend_estimate: FALSE \n", + "persistence_seasonal: \n", + "persistence_seasonal_estimate: \n", + "persistence_xreg: \n", + "persistence_xreg_estimate: FALSE \n", + "persistence_xreg_provided: FALSE \n", + "phi: 1 \n", + "initial_type: optimal \n", + "initial_estimate: TRUE \n", + "initial_level: \n", + "initial_level_estimate: TRUE \n", + "initial_trend: \n", + "initial_trend_estimate: FALSE \n", + "initial_seasonal: \n", + "initial_seasonal_estimate: FALSE \n", + "initial_arima: \n", + "initial_arima_estimate: FALSE \n", + "initial_arima_number: 0 \n", + "initial_xreg_estimate: FALSE \n", + "initial_xreg_provided: FALSE \n", + "arima_model: FALSE \n", + "ar_required: FALSE \n", + "i_required: FALSE \n", + "ma_required: FALSE \n", + "arma_parameters: \n", + "ar_orders: \n", + "i_orders: \n", + "ma_orders: \n", + "components_number_arima: 0 \n", + "components_names_arima: \n", + "xreg_model: FALSE \n", + "xreg_model_initials: \n", + "xreg_data: \n", + "xreg_number: 0 \n", + "xreg_names: \n", + "xreg_parameters_persistence: 0 \n", + "constant_required: FALSE \n", + "constant_estimate: FALSE \n", + "constant_value: \n", + "constant_name: \n" + ] + } + ], + "source": [ + "%%R -i ts_df\n", + "library(smooth)\n", + "# Initialize variables matching R adam() function defaults\n", + "data <- ts_df\n", + "lags <- c(12)\n", + "model <- \"ANN\"\n", + "orders <- list(ar=c(0), i=c(0), ma=c(0), select=FALSE)\n", + "constant <- FALSE\n", + "formula <- NULL \n", + "regressors <- 'use'\n", + "occurrence <- \"none\"\n", + "distribution <- \"default\"\n", + "loss <- \"likelihood\"\n", + "outliers <- \"ignore\"\n", + "level <- 0.99\n", + "h <- 0\n", + "holdout <- FALSE\n", + "persistence <- NULL\n", + "phi <- NULL\n", + "initial <- \"optimal\"\n", + "arma <- NULL\n", + "ic <- \"AICc\"\n", + "bounds <- \"usual\"\n", + "silent <- TRUE\n", + "# paste0() is needed in order to get rid of potential issues with names\n", + "yName <- paste0(deparse(substitute(data)),collapse=\"\");\n", + "\n", + "modelDo <- \"\"\n", + "ellipsis <- list();\n", + "# Assume that the model is not provided\n", + "profilesRecentProvided <- FALSE;\n", + "profilesRecentTable <- NULL;\n", + "\n", + "# paste0() is needed in order to get rid of potential issues with names\n", + "yName <- paste0(deparse(substitute(data)),collapse=\"\");\n", + "checkerReturn <- smooth:::parametersChecker(data, model, lags, formula, orders, constant, arma,\n", + " outliers, level,\n", + " persistence, phi, initial,\n", + " distribution, loss, h, holdout, occurrence, ic, bounds,\n", + " regressors, yName,\n", + " silent, modelDo, ParentEnvironment=environment(), ellipsis, fast=FALSE);\n", + "\n", + "cat(\"ets_model:\", etsModel, \"\\n\")\n", + "cat(\"e_type:\", Etype, \"\\n\")\n", + "cat(\"t_type:\", Ttype, \"\\n\")\n", + "cat(\"s_type:\", Stype, \"\\n\")\n", + "cat(\"model_is_trendy:\", modelIsTrendy, \"\\n\")\n", + "cat(\"model_is_seasonal:\", modelIsSeasonal, \"\\n\")\n", + "cat(\"lags:\", paste(lags, collapse=\", \"), \"\\n\")\n", + "cat(\"lags_model:\", paste(lagsModel, collapse=\", \"), \"\\n\")\n", + "cat(\"lags_model_arima:\", paste(lagsModelARIMA, collapse=\", \"), \"\\n\")\n", + "cat(\"lags_model_all:\", paste(lagsModelAll, collapse=\", \"), \"\\n\")\n", + "cat(\"lags_model_max:\", lagsModelMax, \"\\n\")\n", + "cat(\"profiles_recent_table:\", paste(profilesRecentTable, collapse=\", \"), \"\\n\")\n", + "cat(\"profiles_recent_provided:\", profilesRecentProvided, \"\\n\")\n", + "cat(\"obs_states:\", obsStates, \"\\n\")\n", + "cat(\"obs_in_sample:\", obsInSample, \"\\n\")\n", + "cat(\"obs_all:\", obsAll, \"\\n\")\n", + "cat(\"components_number_ets:\", componentsNumberETS, \"\\n\")\n", + "cat(\"components_number_ets_seasonal:\", componentsNumberETSSeasonal, \"\\n\")\n", + "cat(\"components_names_ets:\", paste(componentsNamesETS, collapse=\", \"), \"\\n\")\n", + "cat(\"ot_logical:\", paste(otLogical, collapse=\", \"), \"\\n\")\n", + "cat(\"y_in_sample:\", paste(yInSample, collapse=\", \"), \"\\n\")\n", + "cat(\"persistence:\", paste(persistence, collapse=\", \"), \"\\n\")\n", + "cat(\"persistence_estimate:\", persistenceEstimate, \"\\n\")\n", + "cat(\"persistence_level:\", persistenceLevel, \"\\n\")\n", + "cat(\"persistence_level_estimate:\", persistenceLevelEstimate, \"\\n\")\n", + "cat(\"persistence_trend:\", persistenceTrend, \"\\n\")\n", + "cat(\"persistence_trend_estimate:\", persistenceTrendEstimate, \"\\n\")\n", + "cat(\"persistence_seasonal:\", persistenceSeasonal, \"\\n\")\n", + "cat(\"persistence_seasonal_estimate:\", persistenceSeasonalEstimate, \"\\n\")\n", + "cat(\"persistence_xreg:\", paste(persistenceXreg, collapse=\", \"), \"\\n\")\n", + "cat(\"persistence_xreg_estimate:\", persistenceXregEstimate, \"\\n\")\n", + "cat(\"persistence_xreg_provided:\", persistenceXregProvided, \"\\n\")\n", + "cat(\"phi:\", phi, \"\\n\")\n", + "cat(\"initial_type:\", initialType, \"\\n\")\n", + "cat(\"initial_estimate:\", initialEstimate, \"\\n\")\n", + "cat(\"initial_level:\", initialLevel, \"\\n\")\n", + "cat(\"initial_level_estimate:\", initialLevelEstimate, \"\\n\")\n", + "cat(\"initial_trend:\", initialTrend, \"\\n\")\n", + "cat(\"initial_trend_estimate:\", initialTrendEstimate, \"\\n\")\n", + "cat(\"initial_seasonal:\", paste(initialSeasonal, collapse=\", \"), \"\\n\")\n", + "cat(\"initial_seasonal_estimate:\", initialSeasonalEstimate, \"\\n\")\n", + "cat(\"initial_arima:\", paste(initialArima, collapse=\", \"), \"\\n\")\n", + "cat(\"initial_arima_estimate:\", initialArimaEstimate, \"\\n\")\n", + "cat(\"initial_arima_number:\", initialArimaNumber, \"\\n\")\n", + "cat(\"initial_xreg_estimate:\", initialXregEstimate, \"\\n\")\n", + "cat(\"initial_xreg_provided:\", initialXregProvided, \"\\n\")\n", + "cat(\"arima_model:\", arimaModel, \"\\n\")\n", + "cat(\"ar_required:\", arRequired, \"\\n\")\n", + "cat(\"i_required:\", iRequired, \"\\n\")\n", + "cat(\"ma_required:\", maRequired, \"\\n\")\n", + "cat(\"arma_parameters:\", paste(armaParameters, collapse=\", \"), \"\\n\")\n", + "cat(\"ar_orders:\", paste(arOrders, collapse=\", \"), \"\\n\")\n", + "cat(\"i_orders:\", paste(iOrders, collapse=\", \"), \"\\n\")\n", + "cat(\"ma_orders:\", paste(maOrders, collapse=\", \"), \"\\n\")\n", + "cat(\"components_number_arima:\", componentsNumberARIMA, \"\\n\")\n", + "cat(\"components_names_arima:\", paste(componentsNamesARIMA, collapse=\", \"), \"\\n\")\n", + "cat(\"xreg_model:\", xregModel, \"\\n\")\n", + "cat(\"xreg_model_initials:\", paste(xregModelInitials, collapse=\", \"), \"\\n\")\n", + "cat(\"xreg_data:\", paste(xregData, collapse=\", \"), \"\\n\")\n", + "cat(\"xreg_number:\", xregNumber, \"\\n\")\n", + "cat(\"xreg_names:\", paste(xregNames, collapse=\", \"), \"\\n\")\n", + "cat(\"xreg_parameters_persistence:\", paste(xregParametersPersistence, collapse=\", \"), \"\\n\")\n", + "cat(\"constant_required:\", constantRequired, \"\\n\")\n", + "cat(\"constant_estimate:\", constantEstimate, \"\\n\")\n", + "cat(\"constant_value:\", constantValue, \"\\n\")\n", + "cat(\"constant_name:\", constantName, \"\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "%%R\n", + "\n", + "if(modelDo==\"estimate\"){\n", + " # If this is LASSO/RIDGE with lambda=1, use MSE to estimate initials only\n", + " lambdaOriginal <- lambda;\n", + " if(any(loss==c(\"LASSO\",\"RIDGE\")) && lambda==1){\n", + " if(etsModel){\n", + " # Pre-set ETS parameters\n", + " persistenceEstimate[] <- FALSE;\n", + " persistenceLevelEstimate[] <- persistenceTrendEstimate[] <-\n", + " persistenceSeasonalEstimate[] <- FALSE;\n", + " persistenceLevel <- persistenceTrend <- persistenceSeasonal <- 0;\n", + " # Phi\n", + " phiEstimate[] <- FALSE;\n", + " phi <- 1;\n", + " }\n", + " if(xregModel){\n", + " # ETSX parameters\n", + " persistenceXregEstimate[] <- FALSE;\n", + " persistenceXreg <- 0;\n", + " }\n", + " if(arimaModel){\n", + " # Pre-set ARMA parameters\n", + " arEstimate[] <- FALSE;\n", + " maEstimate[] <- FALSE;\n", + " armaParameters <- vector(\"numeric\",sum(arOrders)+sum(maOrders));\n", + " j <- 0;\n", + " for(i in 1:length(lags)){\n", + " if(arOrders[i]>0){\n", + " armaParameters[j+1:arOrders[i]] <- 1;\n", + " names(armaParameters)[j+c(1:arOrders[i])] <- paste0(\"phi\",1:arOrders[i],\"[\",lags[i],\"]\");\n", + " j <- j + arOrders[i];\n", + " }\n", + " if(maOrders[i]>0){\n", + " armaParameters[j+1:maOrders[i]] <- 0;\n", + " names(armaParameters)[j+c(1:maOrders[i])] <- paste0(\"theta\",1:maOrders[i],\"[\",lags[i],\"]\");\n", + " j <- j + maOrders[i];\n", + " }\n", + " }\n", + " }\n", + " lambda <- 0;\n", + " }\n", + "\n", + "\n", + "\n", + "}\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "$lagsModel\n", + " [,1]\n", + "[1,] 1\n", + "\n", + "$lagsModelAll\n", + " [,1]\n", + "[1,] 1\n", + "\n", + "$lagsModelMax\n", + "[1] 1\n", + "\n", + "$componentsNumberETS\n", + "[1] 1\n", + "\n", + "$componentsNumberETSSeasonal\n", + "[1] 0\n", + "\n", + "$componentsNumberETSNonSeasonal\n", + "[1] 1\n", + "\n", + "$componentsNamesETS\n", + "[1] \"level\"\n", + "\n", + "$obsStates\n", + "[1] 25\n", + "\n", + "$modelIsTrendy\n", + "[1] FALSE\n", + "\n", + "$modelIsSeasonal\n", + "[1] FALSE\n", + "\n", + "$indexLookupTable\n", + " [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]\n", + "1 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", + " [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]\n", + "1 0 0 0 0 0 0 0 0 0 0 0\n", + "\n", + "$profilesRecentTable\n", + " [,1]\n", + "1 1\n", + "\n" + ] + } + ], + "source": [ + "%%R \n", + "\n", + "architector <- function(etsModel, Etype, Ttype, Stype, lags, lagsModelSeasonal,\n", + " xregNumber, obsInSample, initialType,\n", + " arimaModel, lagsModelARIMA, xregModel, constantRequired,\n", + " profilesRecentTable=NULL, profilesRecentProvided=FALSE){\n", + " # If there is ETS\n", + " if(etsModel){\n", + " modelIsTrendy <- Ttype!=\"N\";\n", + " if(modelIsTrendy){\n", + " # Make lags (1, 1)\n", + " lagsModel <- matrix(c(1,1),ncol=1);\n", + " componentsNamesETS <- c(\"level\",\"trend\");\n", + " }\n", + " else{\n", + " # Make lags (1, ...)\n", + " lagsModel <- matrix(c(1),ncol=1);\n", + " componentsNamesETS <- c(\"level\");\n", + " }\n", + " modelIsSeasonal <- Stype!=\"N\";\n", + " if(modelIsSeasonal){\n", + " # If the lags are for the non-seasonal model\n", + " lagsModel <- matrix(c(lagsModel,lagsModelSeasonal),ncol=1);\n", + " componentsNumberETSSeasonal <- length(lagsModelSeasonal);\n", + " if(componentsNumberETSSeasonal>1){\n", + " componentsNamesETS <- c(componentsNamesETS,paste0(\"seasonal\",c(1:componentsNumberETSSeasonal)));\n", + " }\n", + " else{\n", + " componentsNamesETS <- c(componentsNamesETS,\"seasonal\");\n", + " }\n", + " }\n", + " else{\n", + " componentsNumberETSSeasonal <- 0;\n", + " }\n", + " lagsModelAll <- lagsModel;\n", + "\n", + " componentsNumberETS <- length(lagsModel);\n", + " }\n", + " else{\n", + " modelIsTrendy <- modelIsSeasonal <- FALSE;\n", + " componentsNumberETS <- componentsNumberETSSeasonal <- 0;\n", + " componentsNamesETS <- NULL;\n", + " lagsModelAll <- lagsModel <- NULL;\n", + " }\n", + "\n", + " # If there is ARIMA\n", + " if(arimaModel){\n", + " lagsModelAll <- matrix(c(lagsModel,lagsModelARIMA), ncol=1);\n", + " }\n", + "\n", + " # If constant is needed, add it\n", + " if(constantRequired){\n", + " lagsModelAll <- matrix(c(lagsModelAll,1), ncol=1);\n", + " }\n", + "\n", + " # If there are xreg\n", + " if(xregModel){\n", + " lagsModelAll <- matrix(c(lagsModelAll,rep(1,xregNumber)), ncol=1);\n", + " }\n", + "\n", + " lagsModelMax <- max(lagsModelAll);\n", + "\n", + " # Define the number of cols that should be in the matvt\n", + " obsStates <- obsInSample + lagsModelMax;\n", + "\n", + " # Create ADAM profiles for correct treatment of seasonality\n", + " adamProfiles <- adamProfileCreator(lagsModelAll, lagsModelMax, obsAll,\n", + " lags=lags, yIndex=yIndexAll, yClasses=yClasses);\n", + " if(profilesRecentProvided){\n", + " profilesRecentTable <- profilesRecentTable[,1:lagsModelMax,drop=FALSE];\n", + " }\n", + " else{\n", + " profilesRecentTable <- adamProfiles$recent;\n", + " }\n", + " indexLookupTable <- adamProfiles$lookup;\n", + "\n", + " return(list(lagsModel=lagsModel,lagsModelAll=lagsModelAll, lagsModelMax=lagsModelMax,\n", + " componentsNumberETS=componentsNumberETS, componentsNumberETSSeasonal=componentsNumberETSSeasonal,\n", + " componentsNumberETSNonSeasonal=componentsNumberETS-componentsNumberETSSeasonal,\n", + " componentsNamesETS=componentsNamesETS, obsStates=obsStates, modelIsTrendy=modelIsTrendy,\n", + " modelIsSeasonal=modelIsSeasonal,\n", + " indexLookupTable=indexLookupTable, profilesRecentTable=profilesRecentTable));\n", + " }\n", + " \n", + " adamProfileCreator <- function(lagsModelAll, lagsModelMax, obsAll,\n", + " lags=NULL, yIndex=NULL, yClasses=NULL){\n", + " # lagsModelAll - all lags used in the model for ETS + ARIMA + xreg\n", + " # lagsModelMax - the maximum lag used in the model\n", + " # obsAll - number of observations to create\n", + " # lags - the original lags provided by user (no lags for ARIMA etc). Needed in order to see\n", + " # if weird frequencies are used.\n", + " # yIndex - the indices needed in order to get the weird dates.\n", + " # yClass - the class used for the actuals. If zoo, magic will happen here.\n", + " # Create the matrix with profiles, based on provided lags\n", + " profilesRecentTable <- matrix(0,length(lagsModelAll),lagsModelMax,\n", + " dimnames=list(lagsModelAll,NULL));\n", + " # Create the lookup table\n", + " indexLookupTable <- matrix(1,length(lagsModelAll),obsAll+lagsModelMax,\n", + " dimnames=list(lagsModelAll,NULL));\n", + " # Modify the lookup table in order to get proper indices in C++\n", + " profileIndices <- matrix(c(1:(lagsModelMax*length(lagsModelAll))),length(lagsModelAll));\n", + "\n", + " for(i in 1:length(lagsModelAll)){\n", + " profilesRecentTable[i,1:lagsModelAll[i]] <- 1:lagsModelAll[i];\n", + " # -1 is needed to align this with C++ code\n", + " indexLookupTable[i,lagsModelMax+c(1:obsAll)] <- rep(profileIndices[i,1:lagsModelAll[i]],\n", + " ceiling(obsAll/lagsModelAll[i]))[1:obsAll] -1;\n", + " # Fix the head of the data, before the sample starts\n", + " indexLookupTable[i,1:lagsModelMax] <- tail(rep(unique(indexLookupTable[i,lagsModelMax+c(1:obsAll)]),lagsModelMax),\n", + " lagsModelMax);\n", + " }\n", + "\n", + " # Do shifts for proper lags only:\n", + " # Check lags variable for 24 / 24*7 / 24*365 / 48 / 48*7 / 48*365 / 365 / 52\n", + " # If they are there, find the DST / Leap moments\n", + " # Then amend respective lookup values of profile, shifting them around\n", + " if(any(yClasses==\"zoo\") && !is.null(yIndex) && !is.numeric(yIndex)){\n", + " # If this is weekly data, duplicate 52, when 53 is used\n", + " if(any(lags==52) && any(strftime(yIndex,format=\"%W\")==\"53\")){\n", + " shiftRows <- lagsModelAll==52;\n", + " # If the data does not start with 1, proceed\n", + " if(all(which(strftime(yIndex,format=\"%W\")==\"53\")!=1)){\n", + " indexLookupTable[shiftRows,which(strftime(yIndex,format=\"%W\")==\"53\")] <-\n", + " indexLookupTable[shiftRows,which(strftime(yIndex,format=\"%W\")==\"53\")-1];\n", + " }\n", + " }\n", + "\n", + " #### If this is daily and we have 365 days of year, locate 29th February and use 28th instead\n", + " if(any(c(365,365*48,365*24) %in% lags) && any(strftime(yIndex,format=\"%d/%m\")==\"29/02\")){\n", + " shiftValue <- c(365,365*48,365*24)[c(365,365*48,365*24) %in% lags]/365;\n", + " shiftRows <- lagsModelAll %in% c(365,365*48,365*24);\n", + " # If the data does not start with 1/24/48, proceed (otherwise we refer to negative numbers)\n", + " if(!any(which(strftime(yIndex,format=\"%d/%m\")==\"29/02\") %in% shiftValue)){\n", + " indexLookupTable[shiftRows,which(strftime(yIndex,format=\"%d/%m\")==\"29/02\")] <-\n", + " indexLookupTable[shiftRows,which(strftime(yIndex,format=\"%d/%m\")==\"29/02\")-shiftValue];\n", + " }\n", + " }\n", + "\n", + " #### If this is hourly; Locate DST and do shifts for specific observations\n", + " if(any(c(24,24*7,24*365,48,48*7,48*365) %in% lags)){\n", + " shiftRows <- lagsModelAll %in% c(24,48,24*7,48*7,24*365,48*365);\n", + " # If this is hourly data, then shift 1 hour. If it is halfhourly, shift 2 hours\n", + " shiftValue <- 1;\n", + " if(any(c(48,48*7,48*365) %in% lags)){\n", + " shiftValue[] <- 2;\n", + " }\n", + " # Get the start and the end of DST\n", + " dstValues <- detectdst(yIndex);\n", + " # If there are DST issues, do something\n", + " doShifts <- !is.null(dstValues) && ((nrow(dstValues$start)!=0) | (nrow(dstValues$end)!=0))\n", + " if(doShifts){\n", + " # If the start date is not positioned before the end, introduce the artificial one\n", + " if(nrow(dstValues$start)==0 ||\n", + " (nrow(dstValues$end)>0 && dstValues$start$id[1]>dstValues$end$id[1])){\n", + " dstValues$start <- rbind(data.frame(id=1,date=yIndex[1]),dstValues$start);\n", + " }\n", + " # If the end date is not present or the length of the end is not the same as the start,\n", + " # set the end of series as one\n", + " if(nrow(dstValues$end)==0 ||\n", + " nrow(dstValues$end)1){\n", + " rownames(vecG)[j+c(1:componentsNumberETSSeasonal)] <- paste0(\"gamma\",c(1:componentsNumberETSSeasonal));\n", + " }\n", + " else{\n", + " rownames(vecG)[j+1] <- \"gamma\";\n", + " }\n", + " j <- j+componentsNumberETSSeasonal;\n", + " }\n", + " }\n", + "\n", + " # ARIMA model, names for persistence\n", + " if(arimaModel){\n", + " # Remove diagonal from the ARIMA part of the matrix\n", + " matF[j+1:componentsNumberARIMA,j+1:componentsNumberARIMA] <- 0;\n", + " if(componentsNumberARIMA>1){\n", + " rownames(vecG)[j+1:componentsNumberARIMA] <- paste0(\"psi\",c(1:componentsNumberARIMA));\n", + " }\n", + " else{\n", + " rownames(vecG)[j+1:componentsNumberARIMA] <- \"psi\";\n", + " }\n", + " j <- j+componentsNumberARIMA;\n", + " }\n", + "\n", + " # Modify transition to do drift\n", + " if(!arimaModel && constantRequired){\n", + " matF[1,ncol(matF)] <- 1;\n", + " }\n", + "\n", + " # Regression, persistence\n", + " if(xregModel){\n", + " if(persistenceXregProvided && !persistenceXregEstimate){\n", + " vecG[j+1:xregNumber,] <- persistenceXreg;\n", + " }\n", + " rownames(vecG)[j+1:xregNumber] <- paste0(\"delta\",xregParametersPersistence);\n", + " }\n", + "\n", + " # Damping parameter value\n", + " if(etsModel && modelIsTrendy){\n", + " matF[1,2] <- phi;\n", + " matF[2,2] <- phi;\n", + "\n", + " matWt[,2] <- phi;\n", + " }\n", + "\n", + " # If the arma parameters were provided, fill in the persistence\n", + " if(arimaModel && (!arEstimate && !maEstimate)){\n", + " # Call polynomial\n", + " # arimaPolynomials <- polynomialiser(NULL, arOrders, iOrders, maOrders,\n", + " # arRequired, maRequired, arEstimate, maEstimate, armaParameters, lags);\n", + " arimaPolynomials <- lapply(adamPolynomialiser(0, arOrders, iOrders, maOrders,\n", + " arEstimate, maEstimate, armaParameters, lags), as.vector);\n", + " # Fill in the transition matrix\n", + " if(nrow(nonZeroARI)>0){\n", + " matF[componentsNumberETS+nonZeroARI[,2],componentsNumberETS+nonZeroARI[,2]] <-\n", + " -arimaPolynomials$ariPolynomial[nonZeroARI[,1]];\n", + " }\n", + " # Fill in the persistence vector\n", + " if(nrow(nonZeroARI)>0){\n", + " vecG[componentsNumberETS+nonZeroARI[,2]] <- -arimaPolynomials$ariPolynomial[nonZeroARI[,1]];\n", + " }\n", + " if(nrow(nonZeroMA)>0){\n", + " vecG[componentsNumberETS+nonZeroMA[,2]] <- vecG[componentsNumberETS+nonZeroMA[,2]] +\n", + " arimaPolynomials$maPolynomial[nonZeroMA[,1]];\n", + " }\n", + " }\n", + " else{\n", + " arimaPolynomials <- NULL;\n", + " }\n", + "\n", + "\n", + " if(!profilesRecentProvided){\n", + " \n", + " # ETS model, initial state\n", + " # If something needs to be estimated...\n", + " if(etsModel){\n", + " if(initialEstimate){\n", + " # For the seasonal models\n", + " if(modelIsSeasonal){\n", + " if(obsNonzero>=lagsModelMax*2){\n", + " # If either Etype or Stype are multiplicative, do multiplicative decomposition\n", + " decompositionType <- c(\"additive\",\"multiplicative\")[any(c(Etype,Stype)==\"M\")+1];\n", + " yDecomposition <- msdecompose(yInSample, lags[lags!=1], type=decompositionType,\n", + " smoother=smoother);\n", + " j <- 1;\n", + " # level\n", + " if(initialLevelEstimate){\n", + " matVt[j,1:lagsModelMax] <- yDecomposition$initial[1];\n", + " # matVt[j,1:lagsModelMax] <-\n", + " # switch(decompositionType,\n", + " # \"additive\"=yDecomposition$initial[1]-yDecomposition$initial[2]*lagsModelMax,\n", + " # \"multiplicative\"=yDecomposition$initial[1]/yDecomposition$initial[2]^lagsModelMax);\n", + " # matVt[j,1:lagsModelMax] <- mean(yInSample[1:lagsModelMax]);\n", + " if(xregModel){\n", + " if(Etype==\"A\"){\n", + " matVt[j,1:lagsModelMax] <- matVt[j,1:lagsModelMax] -\n", + " as.vector(xregModelInitials[[1]]$initialXreg %*% xregData[1,]);\n", + " }\n", + " else{\n", + " matVt[j,1:lagsModelMax] <- matVt[j,1:lagsModelMax] /\n", + " as.vector(exp(xregModelInitials[[2]]$initialXreg %*% xregData[1,]));\n", + " }\n", + " }\n", + " }\n", + " else{\n", + " matVt[j,1:lagsModelMax] <- initialLevel;\n", + " }\n", + " j <- j+1;\n", + " # If trend is needed\n", + " if(modelIsTrendy){\n", + " if(initialTrendEstimate){\n", + " if(Ttype==\"A\" && Stype==\"M\"){\n", + " # if(initialLevelEstimate){\n", + " # # level fix\n", + " # matVt[j-1,1:lagsModelMax] <- exp(mean(log(yInSample[otLogical][1:lagsModelMax])));\n", + " # }\n", + " # trend\n", + " matVt[j,1:lagsModelMax] <- prod(yDecomposition$initial)-yDecomposition$initial[1];\n", + " # If the initial trend is higher than the lowest value, initialise with zero.\n", + " # This is a failsafe mechanism for the mixed models\n", + " if(matVt[j,1]<0 && abs(matVt[j,1])>min(abs(yInSample[otLogical]))){\n", + " matVt[j,1:lagsModelMax] <- 0;\n", + " }\n", + " }\n", + " else if(Ttype==\"M\" && Stype==\"A\"){\n", + " # if(initialLevelEstimate){\n", + " # # level fix\n", + " # matVt[j-1,1:lagsModelMax] <- exp(mean(log(yInSample[otLogical][1:lagsModelMax])));\n", + " # }\n", + " # trend\n", + " matVt[j,1:lagsModelMax] <- sum(abs(yDecomposition$initial))/abs(yDecomposition$initial[1]);\n", + " }\n", + " else if(Ttype==\"M\"){\n", + " # trend is too dangerous, make it start from 1.\n", + " matVt[j,1:lagsModelMax] <- 1;\n", + " }\n", + " else{\n", + " # trend\n", + " matVt[j,1:lagsModelMax] <- yDecomposition$initial[2];\n", + " }\n", + " # This is a failsafe for multiplicative trend models, so that the thing does not explode\n", + " if(Ttype==\"M\" && any(matVt[j,1:lagsModelMax]>1.1)){\n", + " matVt[j,1:lagsModelMax] <- 1;\n", + " }\n", + " # This is a failsafe for multiplicative trend models, so that the thing does not explode\n", + " if(Ttype==\"M\" && any(matVt[1,1:lagsModelMax]<0)){\n", + " matVt[1,1:lagsModelMax] <- yInSample[otLogical][1];\n", + " }\n", + " }\n", + " else{\n", + " matVt[j,1:lagsModelMax] <- initialTrend;\n", + " }\n", + " j <- j+1;\n", + " }\n", + " #### Seasonal components\n", + " # For pure models use stuff as is\n", + " if(all(c(Etype,Stype)==\"A\") || all(c(Etype,Stype)==\"M\") ||\n", + " (Etype==\"A\" & Stype==\"M\")){\n", + " for(i in 1:componentsNumberETSSeasonal){\n", + " if(initialSeasonalEstimate[i]){\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- yDecomposition$seasonal[[i]][1:lagsModel[i+j-1]];\n", + " # Renormalise the initial seasons\n", + " if(Stype==\"A\"){\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <-\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] -\n", + " mean(matVt[i+j-1,1:lagsModel[i+j-1]]);\n", + " }\n", + " else{\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <-\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] /\n", + " exp(mean(log(matVt[i+j-1,1:lagsModel[i+j-1]])));\n", + " }\n", + " }\n", + " else{\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- initialSeasonal[[i]];\n", + " }\n", + " }\n", + " }\n", + " # For mixed models use a different set of initials\n", + " else if(Etype==\"M\" && Stype==\"A\"){\n", + " for(i in 1:componentsNumberETSSeasonal){\n", + " if(initialSeasonalEstimate[i]){\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <-\n", + " log(yDecomposition$seasonal[[i]][1:lagsModel[i+j-1]])*min(yInSample[otLogical]);\n", + " # Renormalise the initial seasons\n", + " if(Stype==\"A\"){\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- matVt[i+j-1,1:lagsModel[i+j-1]] -\n", + " mean(matVt[i+j-1,1:lagsModel[i+j-1]]);\n", + " }\n", + " else{\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- matVt[i+j-1,1:lagsModel[i+j-1]] /\n", + " exp(mean(log(matVt[i+j-1,1:lagsModel[i+j-1]])));\n", + " }\n", + " }\n", + " else{\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- initialSeasonal[[i]];\n", + " }\n", + " }\n", + " }\n", + " }\n", + " else{\n", + " j <- 1;\n", + " # level\n", + " if(initialLevelEstimate){\n", + " matVt[j,1:lagsModelMax] <- mean(yInSample[1:lagsModelMax]);\n", + " if(xregModel){\n", + " if(Etype==\"A\"){\n", + " matVt[j,1:lagsModelMax] <- matVt[j,1:lagsModelMax] -\n", + " as.vector(xregModelInitials[[1]]$initialXreg %*% xregData[1,]);\n", + " }\n", + " else{\n", + " matVt[j,1:lagsModelMax] <- matVt[j,1:lagsModelMax] /\n", + " as.vector(exp(xregModelInitials[[2]]$initialXreg %*% xregData[1,]));\n", + " }\n", + " }\n", + " }\n", + " else{\n", + " matVt[j,1:lagsModelMax] <- initialLevel;\n", + " }\n", + " j <- j+1;\n", + " if(modelIsTrendy){\n", + " if(initialTrendEstimate){\n", + " if(Ttype==\"A\"){\n", + " # trend\n", + " matVt[j,1:lagsModelMax] <- yInSample[2]-yInSample[1];\n", + " }\n", + " else if(Ttype==\"M\"){\n", + " if(initialLevelEstimate){\n", + " # level fix\n", + " matVt[j-1,1:lagsModelMax] <- exp(mean(log(yInSample[otLogical][1:lagsModelMax])));\n", + " }\n", + " # trend\n", + " matVt[j,1:lagsModelMax] <- yInSample[2]/yInSample[1];\n", + " }\n", + " # This is a failsafe for multiplicative trend models, so that the thing does not explode\n", + " if(Ttype==\"M\" && any(matVt[j,1:lagsModelMax]>1.1)){\n", + " matVt[j,1:lagsModelMax] <- 1;\n", + " }\n", + " }\n", + " else{\n", + " matVt[j,1:lagsModelMax] <- initialTrend;\n", + " }\n", + "\n", + " # Do roll back. Especially useful for backcasting and multisteps\n", + " if(Ttype==\"A\"){\n", + " matVt[j-1,1:lagsModelMax] <- matVt[j-1,1] - matVt[j,1]*lagsModelMax;\n", + " }\n", + " else if(Ttype==\"M\"){\n", + " matVt[j-1,1:lagsModelMax] <- matVt[j-1,1] / matVt[j,1]^lagsModelMax;\n", + " }\n", + " j <- j+1;\n", + " }\n", + " #### Seasonal components\n", + " # For pure models use stuff as is\n", + " if(Stype==\"A\"){\n", + " for(i in 1:componentsNumberETSSeasonal){\n", + " if(initialSeasonalEstimate[i]){\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- yInSample[1:lagsModel[i+j-1]]-matVt[1,1];\n", + " # Renormalise the initial seasons\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- matVt[i+j-1,1:lagsModel[i+j-1]] -\n", + " mean(matVt[i+j-1,1:lagsModel[i+j-1]]);\n", + " }\n", + " else{\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- initialSeasonal[[i]];\n", + " }\n", + " }\n", + " }\n", + " # For mixed models use a different set of initials\n", + " else{\n", + " for(i in 1:componentsNumberETSSeasonal){\n", + " if(initialSeasonalEstimate[i]){\n", + " # abs() is needed for mixed ETS+ARIMA\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- yInSample[1:lagsModel[i+j-1]]/abs(matVt[1,1]);\n", + " # Renormalise the initial seasons\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- matVt[i+j-1,1:lagsModel[i+j-1]] /\n", + " exp(mean(log(matVt[i+j-1,1:lagsModel[i+j-1]])));\n", + " }\n", + " else{\n", + " matVt[i+j-1,1:lagsModel[i+j-1]] <- initialSeasonal[[i]];\n", + " }\n", + " }\n", + " }\n", + " }\n", + " }\n", + " # Non-seasonal models\n", + " else{\n", + " # level\n", + " if(initialLevelEstimate){\n", + " matVt[1,1:lagsModelMax] <- mean(yInSample[1:max(lagsModelMax,ceiling(obsInSample*0.2))]);\n", + " # if(xregModel){\n", + " # if(Etype==\"A\"){\n", + " # matVt[1,1:lagsModelMax] <- matVt[1,lagsModelMax] -\n", + " # as.vector(xregModelInitials[[1]]$initialXreg %*% xregData[1,]);\n", + " # }\n", + " # else{\n", + " # matVt[1,1:lagsModelMax] <- matVt[1,lagsModelMax] /\n", + " # as.vector(exp(xregModelInitials[[2]]$initialXreg %*% xregData[1,]));\n", + " # }\n", + " # }\n", + " }\n", + " else{\n", + " matVt[1,1:lagsModelMax] <- initialLevel;\n", + " }\n", + " if(modelIsTrendy){\n", + " if(initialTrendEstimate){\n", + " matVt[2,1:lagsModelMax] <- switch(Ttype,\n", + " \"A\" = mean(diff(yInSample[1:max(lagsModelMax+1,\n", + " ceiling(obsInSample*0.2))]),\n", + " na.rm=TRUE),\n", + " \"M\" = exp(mean(diff(log(yInSample[otLogical])),na.rm=TRUE)));\n", + " }\n", + " else{\n", + " matVt[2,1:lagsModelMax] <- initialTrend;\n", + " }\n", + " }\n", + " }\n", + "\n", + " if(initialLevelEstimate && Etype==\"M\" && matVt[1,lagsModelMax]==0){\n", + " matVt[1,1:lagsModelMax] <- mean(yInSample);\n", + " }\n", + " }\n", + " # Else, insert the provided ones... make sure that this is not a backcasting\n", + " else if(!initialEstimate && initialType==\"provided\"){\n", + " j <- 1;\n", + " matVt[j,1:lagsModelMax] <- initialLevel;\n", + " if(modelIsTrendy){\n", + " j <- j+1;\n", + " matVt[j,1:lagsModelMax] <- initialTrend;\n", + " }\n", + " if(modelIsSeasonal){\n", + " for(i in 1:componentsNumberETSSeasonal){\n", + " # This is misaligned, but that's okay, because this goes directly to profileRecent\n", + " # matVt[j+i,(lagsModelMax-lagsModel[j+i])+1:lagsModel[j+i]] <- initialSeasonal[[i]];\n", + " matVt[j+i,1:lagsModel[j+i]] <- initialSeasonal[[i]];\n", + " }\n", + " }\n", + " j <- j+componentsNumberETSSeasonal;\n", + " }\n", + " }\n", + "\n", + " # If ARIMA orders are specified, prepare initials\n", + " if(arimaModel){\n", + " if(initialArimaEstimate){\n", + " matVt[componentsNumberETS+1:componentsNumberARIMA, 1:initialArimaNumber] <-\n", + " switch(Etype, \"A\"=0, \"M\"=1);\n", + " if(any(lags>1)){\n", + " yDecomposition <- tail(msdecompose(yInSample,\n", + " lags[lags!=1],\n", + " type=switch(Etype,\n", + " \"A\"=\"additive\",\n", + " \"M\"=\"multiplicative\"))$seasonal,1)[[1]];\n", + " }\n", + " else{\n", + " yDecomposition <- switch(Etype,\n", + " \"A\"=mean(diff(yInSample[otLogical])),\n", + " \"M\"=exp(mean(diff(log(yInSample[otLogical])))));\n", + " }\n", + " matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber] <-\n", + " rep(yDecomposition,ceiling(initialArimaNumber/max(lags)))[1:initialArimaNumber];\n", + " # rep(yInSample[1:initialArimaNumber],each=componentsNumberARIMA);\n", + "\n", + " # Failsafe mechanism in case the sample is too small\n", + " # matVt[is.na(matVt)] <- switch(Etype, \"A\"=0, \"M\"=1);\n", + "\n", + " # If this is just ARIMA with optimisation, refine the initials\n", + " # if(!etsModel && initialType!=\"complete\"){\n", + " # arimaPolynomials <- polynomialiser(rep(0.1,sum(c(arOrders,maOrders))), arOrders, iOrders, maOrders,\n", + " # arRequired, maRequired, arEstimate, maEstimate, armaParameters, lags);\n", + " # if(nrow(nonZeroARI)>0 && nrow(nonZeroARI)>=nrow(nonZeroMA)){\n", + " # matVt[componentsNumberETS+nonZeroARI[,2],\n", + " # 1:initialArimaNumber] <-\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " # t(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$ariPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " # t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$ariPolynomial,1)));\n", + " # }\n", + " # else{\n", + " # matVt[componentsNumberETS+nonZeroMA[,2],\n", + " # 1:initialArimaNumber] <-\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$maPolynomial[nonZeroMA[,1]] %*%\n", + " # t(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$maPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$maPolynomial[nonZeroMA[,1]] %*%\n", + " # t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$maPolynomial,1)));\n", + " # }\n", + " # }\n", + " }\n", + " else{\n", + " # Fill in the matrix with 0 / 1, just in case if the state will not be updated anymore\n", + " matVt[componentsNumberETS+1:componentsNumberARIMA, 1:initialArimaNumber] <-\n", + " switch(Etype, \"A\"=0, \"M\"=1);\n", + " # Insert the provided initials\n", + " matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber] <-\n", + " initialArima[1:initialArimaNumber];\n", + "\n", + " # matVt[componentsNumberETS+nonZeroARI[,2], 1:initialArimaNumber] <-\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*% t(initialArima[1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$ariPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*% t(log(initialArima[1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$ariPolynomial,1)));\n", + "\n", + " # If only AR is needed, but provided or if both are needed, but provided\n", + " # if(((arRequired && !arEstimate) && !maRequired) ||\n", + " # ((arRequired && !arEstimate) && (maRequired && !maEstimate)) ||\n", + " # (iRequired && !arEstimate && !maEstimate)){\n", + " # matVt[componentsNumberETS+nonZeroARI[,2],1:initialArimaNumber] <-\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " # t(initialArima[1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$ariPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " # t(log(initialArima[1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$ariPolynomial,1)));\n", + " # }\n", + " # If only MA is needed, but provided\n", + " # else if(((maRequired && !maEstimate) && !arRequired)){\n", + " # matVt[componentsNumberETS+nonZeroMA[,2],1:initialArimaNumber] <-\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$maPolynomial[nonZeroMA[,1]] %*%\n", + " # t(initialArima[1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$maPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$maPolynomial[nonZeroMA[,1]] %*%\n", + " # t(log(initialArima[1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$maPolynomial,1)));\n", + " # }\n", + " }\n", + " }\n", + "\n", + " # Fill in the initials for xreg\n", + " if(xregModel){\n", + " if(Etype==\"A\" || initialXregProvided || is.null(xregModelInitials[[2]])){\n", + " matVt[componentsNumberETS+componentsNumberARIMA+1:xregNumber,\n", + " 1:lagsModelMax] <- xregModelInitials[[1]]$initialXreg;\n", + " }\n", + " else{\n", + " matVt[componentsNumberETS+componentsNumberARIMA+1:xregNumber,\n", + " 1:lagsModelMax] <- xregModelInitials[[2]]$initialXreg;\n", + " }\n", + " }\n", + "\n", + " # Add constant if needed\n", + " if(constantRequired){\n", + " if(constantEstimate){\n", + " # Add the mean of data\n", + " if(sum(iOrders)==0 && !etsModel){\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,] <- mean(yInSample[otLogical]);\n", + " }\n", + " # Add first differences\n", + " else{\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,] <-\n", + " switch(Etype,\n", + " \"A\"=mean(diff(yInSample[otLogical])),\n", + " \"M\"=exp(mean(diff(log(yInSample[otLogical])))));\n", + " }\n", + " }\n", + " else{\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,] <- constantValue;\n", + " }\n", + " # If ETS model is used, change the initial level\n", + " if(etsModel && initialLevelEstimate){\n", + " if(Etype==\"A\"){\n", + " matVt[1,1:lagsModelMax] <- matVt[1,1:lagsModelMax] -\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,1];\n", + " }\n", + " else{\n", + " matVt[1,1:lagsModelMax] <- matVt[1,1:lagsModelMax] /\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,1];\n", + " }\n", + " }\n", + " # If ARIMA is done, debias states\n", + " if(arimaModel && initialArimaEstimate){\n", + " if(Etype==\"A\"){\n", + " matVt[componentsNumberETS+nonZeroARI[,2],1:initialArimaNumber] <-\n", + " matVt[componentsNumberETS+nonZeroARI[,2],1:initialArimaNumber] -\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,1];\n", + " }\n", + " else{\n", + " matVt[componentsNumberETS+nonZeroARI[,2],1:initialArimaNumber] <-\n", + " matVt[componentsNumberETS+nonZeroARI[,2],1:initialArimaNumber] /\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,1];\n", + " }\n", + " }\n", + " }\n", + " }\n", + " else{\n", + " matVt[,1:lagsModelMax] <- profilesRecentTable;\n", + " }\n", + "\n", + " return(list(matVt=matVt, matWt=matWt, matF=matF, vecG=vecG, arimaPolynomials=arimaPolynomials));\n", + " }\n", + "\n", + "adamCreated <- creator(etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal,\n", + " lags, lagsModel, lagsModelARIMA, lagsModelAll, lagsModelMax,\n", + " profilesRecentTable, profilesRecentProvided,\n", + " obsStates, obsInSample, obsAll, componentsNumberETS, componentsNumberETSSeasonal,\n", + " componentsNamesETS, otLogical, yInSample,\n", + " persistence, persistenceEstimate,\n", + " persistenceLevel, persistenceLevelEstimate, persistenceTrend, persistenceTrendEstimate,\n", + " persistenceSeasonal, persistenceSeasonalEstimate,\n", + " persistenceXreg, persistenceXregEstimate, persistenceXregProvided,\n", + " phi,\n", + " initialType, initialEstimate,\n", + " initialLevel, initialLevelEstimate, initialTrend, initialTrendEstimate,\n", + " initialSeasonal, initialSeasonalEstimate,\n", + " initialArima, initialArimaEstimate, initialArimaNumber,\n", + " initialXregEstimate, initialXregProvided,\n", + " arimaModel, arRequired, iRequired, maRequired, armaParameters,\n", + " arOrders, iOrders, maOrders,\n", + " componentsNumberARIMA, componentsNamesARIMA,\n", + " xregModel, xregModelInitials, xregData, xregNumber, xregNames,\n", + " xregParametersPersistence,\n", + " constantRequired, constantEstimate, constantValue, constantName)\n", + "\n", + "adamCreated$matVt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "$B\n", + "alpha level \n", + " 0.1 176.4 \n", + "\n", + "$Bl\n", + "[1] 0 -Inf\n", + "\n", + "$Bu\n", + "[1] 1 Inf\n", + "\n" + ] + } + ], + "source": [ + "%%R\n", + "\n", + "initialiser <- function(etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal,\n", + " componentsNumberETSNonSeasonal, componentsNumberETSSeasonal, componentsNumberETS,\n", + " lags, lagsModel, lagsModelSeasonal, lagsModelARIMA, lagsModelMax,\n", + " matVt,\n", + " # persistence values\n", + " persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate,\n", + " persistenceSeasonalEstimate, persistenceXregEstimate,\n", + " # initials\n", + " phiEstimate, initialType, initialEstimate,\n", + " initialLevelEstimate, initialTrendEstimate, initialSeasonalEstimate,\n", + " initialArimaEstimate, initialXregEstimate,\n", + " # ARIMA elements\n", + " arimaModel, arRequired, maRequired, arEstimate, maEstimate, arOrders, maOrders,\n", + " componentsNumberARIMA, componentsNamesARIMA, initialArimaNumber,\n", + " # Explanatory variables\n", + " xregModel, xregNumber,\n", + " xregParametersEstimated, xregParametersPersistence,\n", + " # Constant and other stuff\n", + " constantEstimate, constantName, otherParameterEstimate){\n", + " # The vector of logicals for persistence elements\n", + " persistenceEstimateVector <- c(persistenceLevelEstimate,modelIsTrendy&persistenceTrendEstimate,\n", + " modelIsSeasonal&persistenceSeasonalEstimate);\n", + "\n", + " # The order:\n", + " # Persistence of states and for xreg, phi, AR and MA parameters, initials, initialsARIMA, initials for xreg\n", + " B <- Bl <- Bu <- vector(\"numeric\",\n", + " # Values of the persistence vector + phi\n", + " etsModel*(persistenceLevelEstimate + modelIsTrendy*persistenceTrendEstimate +\n", + " modelIsSeasonal*sum(persistenceSeasonalEstimate) + phiEstimate) +\n", + " xregModel*persistenceXregEstimate*max(xregParametersPersistence) +\n", + " # AR and MA values\n", + " arimaModel*(arEstimate*sum(arOrders)+maEstimate*sum(maOrders)) +\n", + " # initials of ETS\n", + " etsModel*all(initialType!=c(\"complete\",\"backcasting\"))*\n", + " (initialLevelEstimate +\n", + " (modelIsTrendy*initialTrendEstimate) +\n", + " (modelIsSeasonal*sum(initialSeasonalEstimate*(lagsModelSeasonal-1)))) +\n", + " # initials of ARIMA\n", + " all(initialType!=c(\"complete\",\"backcasting\"))*arimaModel*initialArimaNumber*initialArimaEstimate +\n", + " # initials of xreg\n", + " (initialType!=\"complete\")*xregModel*initialXregEstimate*sum(xregParametersEstimated) +\n", + " constantEstimate + otherParameterEstimate);\n", + "\n", + " j <- 0;\n", + " if(etsModel){\n", + " # Fill in persistence\n", + " if(persistenceEstimate && any(persistenceEstimateVector)){\n", + " if(any(c(Etype,Ttype,Stype)==\"M\")){\n", + " # A special type of model which is not safe: AAM, MAA, MAM\n", + " if((Etype==\"A\" && Ttype==\"A\" && Stype==\"M\") || (Etype==\"A\" && Ttype==\"M\" && Stype==\"A\") ||\n", + " (any(initialType==c(\"complete\",\"backcasting\")) &&\n", + " ((Etype==\"M\" && Ttype==\"A\" && Stype==\"A\") || (Etype==\"M\" && Ttype==\"A\" && Stype==\"M\")))){\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0.01,0,rep(0,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " # MMA is the worst. Set everything to zero and see if anything can be done...\n", + " else if((Etype==\"M\" && Ttype==\"M\" && Stype==\"A\")){\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0,0,rep(0,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " else if(Etype==\"M\" && Ttype==\"A\"){\n", + " if(any(initialType==c(\"complete\",\"backcasting\"))){\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0.1,0,rep(0.3,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " else{\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0.2,0.01,rep(0.3,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " }\n", + " else if(Etype==\"M\" && Ttype==\"M\"){\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0.1,0.05,rep(0.3,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " else{\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0.1,0.05,rep(0.3,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " }\n", + " else{\n", + " B[1:sum(persistenceEstimateVector)] <-\n", + " c(0.1,0.05,rep(0.3,componentsNumberETSSeasonal))[which(persistenceEstimateVector)];\n", + " }\n", + " if(bounds==\"usual\"){\n", + " Bl[1:sum(persistenceEstimateVector)] <- rep(0, sum(persistenceEstimateVector));\n", + " Bu[1:sum(persistenceEstimateVector)] <- rep(1, sum(persistenceEstimateVector));\n", + " }\n", + " else{\n", + " Bl[1:sum(persistenceEstimateVector)] <- rep(-5, sum(persistenceEstimateVector));\n", + " Bu[1:sum(persistenceEstimateVector)] <- rep(5, sum(persistenceEstimateVector));\n", + " }\n", + " # Names for B\n", + " if(persistenceLevelEstimate){\n", + " j[] <- j+1\n", + " names(B)[j] <- \"alpha\";\n", + " }\n", + " if(modelIsTrendy && persistenceTrendEstimate){\n", + " j[] <- j+1\n", + " names(B)[j] <- \"beta\";\n", + " }\n", + " if(modelIsSeasonal && any(persistenceSeasonalEstimate)){\n", + " if(componentsNumberETSSeasonal>1){\n", + " names(B)[j+c(1:sum(persistenceSeasonalEstimate))] <-\n", + " paste0(\"gamma\",c(1:componentsNumberETSSeasonal));\n", + " }\n", + " else{\n", + " names(B)[j+1] <- \"gamma\";\n", + " }\n", + " j[] <- j+sum(persistenceSeasonalEstimate);\n", + " }\n", + " }\n", + " }\n", + "\n", + " # Persistence if xreg is provided\n", + " if(xregModel && persistenceXregEstimate){\n", + " xregPersistenceNumber <- max(xregParametersPersistence);\n", + " B[j+1:xregPersistenceNumber] <- rep(switch(Etype,\"A\"=0.01,\"M\"=0),xregPersistenceNumber);\n", + " Bl[j+1:xregPersistenceNumber] <- rep(-5, xregPersistenceNumber);\n", + " Bu[j+1:xregPersistenceNumber] <- rep(5, xregPersistenceNumber);\n", + " names(B)[j+1:xregPersistenceNumber] <- paste0(\"delta\",c(1:xregPersistenceNumber));\n", + " j[] <- j+xregPersistenceNumber;\n", + " }\n", + "\n", + " # Damping parameter\n", + " if(etsModel && phiEstimate){\n", + " j[] <- j+1;\n", + " B[j] <- 0.95;\n", + " names(B)[j] <- \"phi\";\n", + " Bl[j] <- 0;\n", + " Bu[j] <- 1;\n", + " }\n", + "\n", + " # ARIMA parameters (AR / MA)\n", + " if(arimaModel){\n", + " # These are filled in lags-wise\n", + " if(any(c(arEstimate,maEstimate))){\n", + " acfValues <- rep(-0.1, maOrders %*% lags);\n", + " pacfValues <- rep(0.1, arOrders %*% lags);\n", + " # If this is ETS + ARIMA model or no differences model, then don't bother with initials\n", + " # The latter does not make sense because of non-stationarity in ACF / PACF\n", + " # Otherwise use ACF / PACF values as starting parameters for ARIMA\n", + " if(!(etsModel || all(iOrders==0))){\n", + " yDifferenced <- yInSample;\n", + " # If the model has differences, take them\n", + " if(any(iOrders>0)){\n", + " for(i in 1:length(iOrders)){\n", + " if(iOrders[i]>0){\n", + " yDifferenced <- diff(yDifferenced,lag=lags[i],differences=iOrders[i]);\n", + " }\n", + " }\n", + " }\n", + " # Do ACF/PACF initialisation only for non-seasonal models\n", + " if(all(lags<=1)){\n", + " if(maRequired && maEstimate){\n", + " # If the sample is smaller than lags, it will be substituted by default values\n", + " acfValues[1:min(maOrders %*% lags, length(yDifferenced)-1)] <-\n", + " acf(yDifferenced,lag.max=max(1,maOrders %*% lags),plot=FALSE)$acf[-1];\n", + " }\n", + " if(arRequired && arEstimate){\n", + " # If the sample is smaller than lags, it will be substituted by default values\n", + " pacfValues[1:min(arOrders %*% lags, length(yDifferenced)-1)] <-\n", + " pacf(yDifferenced,lag.max=max(1,arOrders %*% lags),plot=FALSE)$acf;\n", + " }\n", + " }\n", + " }\n", + " for(i in 1:length(lags)){\n", + " if(arRequired && arEstimate && arOrders[i]>0){\n", + " if(all(!is.nan(pacfValues[c(1:arOrders[i])*lags[i]]))){\n", + " B[j+c(1:arOrders[i])] <- pacfValues[c(1:arOrders[i])*lags[i]];\n", + " }\n", + " else{\n", + " B[j+c(1:arOrders[i])] <- 0.1;\n", + " }\n", + " if(sum(B[j+c(1:arOrders[i])])>1){\n", + " B[j+c(1:arOrders[i])] <- B[j+c(1:arOrders[i])] / sum(B[j+c(1:arOrders[i])]) - 0.01;\n", + " }\n", + " # B[j+c(1:arOrders[i])] <- rep(0.1,arOrders[i]);\n", + " Bl[j+c(1:arOrders[i])] <- -5;\n", + " Bu[j+c(1:arOrders[i])] <- 5;\n", + " names(B)[j+1:arOrders[i]] <- paste0(\"phi\",1:arOrders[i],\"[\",lags[i],\"]\");\n", + " j[] <- j + arOrders[i];\n", + " }\n", + " if(maRequired && maEstimate && maOrders[i]>0){\n", + " if(all(!is.nan(acfValues[c(1:maOrders[i])*lags[i]]))){\n", + " B[j+c(1:maOrders[i])] <- acfValues[c(1:maOrders[i])*lags[i]];\n", + " }\n", + " else{\n", + " B[j+c(1:maOrders[i])] <- 0.1;\n", + " }\n", + " if(sum(B[j+c(1:maOrders[i])])>1){\n", + " B[j+c(1:maOrders[i])] <- B[j+c(1:maOrders[i])] / sum(B[j+c(1:maOrders[i])]) - 0.01;\n", + " }\n", + " # B[j+c(1:maOrders[i])] <- rep(-0.1,maOrders[i]);\n", + " Bl[j+c(1:maOrders[i])] <- -5;\n", + " Bu[j+c(1:maOrders[i])] <- 5;\n", + " names(B)[j+1:maOrders[i]] <- paste0(\"theta\",1:maOrders[i],\"[\",lags[i],\"]\");\n", + " j[] <- j + maOrders[i];\n", + " }\n", + " }\n", + " }\n", + "\n", + " arimaPolynomials <- lapply(adamPolynomialiser(B[j+1:sum(c(arOrders*arEstimate,maOrders*maEstimate))],\n", + " arOrders, iOrders, maOrders,\n", + " arEstimate, maEstimate, armaParameters, lags), as.vector)\n", + " }\n", + "\n", + " # Initials\n", + " if(etsModel && all(initialType!=c(\"complete\",\"backcasting\")) && initialEstimate){\n", + " if(initialLevelEstimate){\n", + " j[] <- j+1;\n", + " B[j] <- matVt[1,1];\n", + " names(B)[j] <- \"level\";\n", + " if(Etype==\"A\"){\n", + " Bl[j] <- -Inf;\n", + " Bu[j] <- Inf;\n", + " }\n", + " else{\n", + " Bl[j] <- 0;\n", + " Bu[j] <- Inf;\n", + " }\n", + " }\n", + " if(modelIsTrendy && initialTrendEstimate){\n", + " j[] <- j+1;\n", + " B[j] <- matVt[2,1];\n", + " names(B)[j] <- \"trend\";\n", + " if(Ttype==\"A\"){\n", + " Bl[j] <- -Inf;\n", + " Bu[j] <- Inf;\n", + " }\n", + " else{\n", + " Bl[j] <- 0;\n", + " # 2 is already too much for the multiplicative model\n", + " Bu[j] <- 2;\n", + " }\n", + " }\n", + " if(modelIsSeasonal && any(initialSeasonalEstimate)){\n", + " if(componentsNumberETSSeasonal>1){\n", + " for(k in 1:componentsNumberETSSeasonal){\n", + " if(initialSeasonalEstimate[k]){\n", + " # -1 is needed in order to remove the redundant seasonal element (normalisation)\n", + " B[j+2:lagsModel[componentsNumberETSNonSeasonal+k]-1] <-\n", + " matVt[componentsNumberETSNonSeasonal+k, 2:lagsModel[componentsNumberETSNonSeasonal+k]-1];\n", + " names(B)[j+2:(lagsModel[componentsNumberETSNonSeasonal+k])-1] <-\n", + " paste0(\"seasonal\",k,\"_\",2:lagsModel[componentsNumberETSNonSeasonal+k]-1);\n", + " if(Stype==\"A\"){\n", + " Bl[j+2:lagsModel[componentsNumberETSNonSeasonal+k]-1] <- -Inf;\n", + " Bu[j+2:lagsModel[componentsNumberETSNonSeasonal+k]-1] <- Inf;\n", + " }\n", + " else{\n", + " Bl[j+2:lagsModel[componentsNumberETSNonSeasonal+k]-1] <- 0;\n", + " Bu[j+2:lagsModel[componentsNumberETSNonSeasonal+k]-1] <- Inf;\n", + " }\n", + " j[] <- j+(lagsModelSeasonal[k]-1);\n", + " }\n", + " }\n", + " }\n", + " else{\n", + " # -1 is needed in order to remove the redundant seasonal element (normalisation)\n", + " B[j+2:(lagsModel[componentsNumberETS])-1] <- matVt[componentsNumberETS,2:lagsModel[componentsNumberETS]-1];\n", + " names(B)[j+2:(lagsModel[componentsNumberETS])-1] <- paste0(\"seasonal_\",2:lagsModel[componentsNumberETS]-1);\n", + " if(Stype==\"A\"){\n", + " Bl[j+2:(lagsModel[componentsNumberETS])-1] <- -Inf;\n", + " Bu[j+2:(lagsModel[componentsNumberETS])-1] <- Inf;\n", + " }\n", + " else{\n", + " Bl[j+2:(lagsModel[componentsNumberETS])-1] <- 0;\n", + " Bu[j+2:(lagsModel[componentsNumberETS])-1] <- Inf;\n", + " }\n", + " j[] <- j+(lagsModel[componentsNumberETS]-1);\n", + " }\n", + " }\n", + " }\n", + "\n", + " # ARIMA initials\n", + " if(arimaModel && all(initialType!=c(\"complete\",\"backcasting\")) && initialArimaEstimate){\n", + " B[j+1:initialArimaNumber] <- head(matVt[componentsNumberETS+componentsNumberARIMA,1:lagsModelMax],initialArimaNumber);\n", + " names(B)[j+1:initialArimaNumber] <- paste0(\"ARIMAState\",1:initialArimaNumber);\n", + "\n", + " # Fix initial state if the polynomial is not zero\n", + " if(tail(arimaPolynomials$ariPolynomial,1)!=0){\n", + " B[j+1:initialArimaNumber] <- B[j+1:initialArimaNumber] / tail(arimaPolynomials$ariPolynomial,1);\n", + " }\n", + "\n", + " if(Etype==\"A\"){\n", + " Bl[j+1:initialArimaNumber] <- -Inf;\n", + " Bu[j+1:initialArimaNumber] <- Inf;\n", + " }\n", + " else{\n", + " # Make sure that ARIMA states are positive to avoid errors\n", + " B[j+1:initialArimaNumber] <- abs(B[j+1:initialArimaNumber]);\n", + " Bl[j+1:initialArimaNumber] <- 0;\n", + " Bu[j+1:initialArimaNumber] <- Inf;\n", + " }\n", + " j[] <- j+initialArimaNumber;\n", + " }\n", + "\n", + " # Initials of the xreg\n", + " if(initialType!=\"complete\" && initialXregEstimate){\n", + " xregNumberToEstimate <- sum(xregParametersEstimated);\n", + " B[j+1:xregNumberToEstimate] <- matVt[componentsNumberETS+componentsNumberARIMA+\n", + " which(xregParametersEstimated==1),1];\n", + " names(B)[j+1:xregNumberToEstimate] <- rownames(matVt)[componentsNumberETS+componentsNumberARIMA+\n", + " which(xregParametersEstimated==1)];\n", + " if(Etype==\"A\"){\n", + " Bl[j+1:xregNumberToEstimate] <- -Inf;\n", + " Bu[j+1:xregNumberToEstimate] <- Inf;\n", + " }\n", + " else{\n", + " Bl[j+1:xregNumberToEstimate] <- -Inf;\n", + " Bu[j+1:xregNumberToEstimate] <- Inf;\n", + " }\n", + " j[] <- j+xregNumberToEstimate;\n", + " }\n", + "\n", + " if(constantEstimate){\n", + " j[] <- j+1;\n", + " B[j] <- matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,1];\n", + " names(B)[j] <- constantName;\n", + " if(etsModel || sum(iOrders)!=0){\n", + " if(Etype==\"A\"){\n", + " Bu[j] <- quantile(diff(yInSample[otLogical]),0.6);\n", + " Bl[j] <- -Bu[j];\n", + " }\n", + " else{\n", + " Bu[j] <- exp(quantile(diff(log(yInSample[otLogical])),0.6));\n", + " Bl[j] <- exp(quantile(diff(log(yInSample[otLogical])),0.4));\n", + " }\n", + "\n", + " # Failsafe for weird cases, when upper bound is the same or lower than the lower one\n", + " if(Bu[j]<=Bl[j]){\n", + " Bu[j] <- Inf;\n", + " Bl[j] <- switch(Etype,\"A\"=-Inf,\"M\"=0);\n", + " }\n", + "\n", + " # Failsafe for cases, when the B is outside of bounds\n", + " if(B[j]<=Bl[j]){\n", + " Bl[j] <- switch(Etype,\"A\"=-Inf,\"M\"=0);\n", + " }\n", + " if(B[j]>=Bu[j]){\n", + " Bu[j] <- Inf;\n", + " }\n", + " }\n", + " else{\n", + " # if(Etype==\"A\"){\n", + " # B[j]*1.01 is needed to make sure that the bounds cover the initial value\n", + " Bu[j] <- max(abs(yInSample[otLogical]),abs(B[j])*1.01);\n", + " Bl[j] <- -Bu[j];\n", + " # }\n", + " # else{\n", + " # Bu[j] <- 1.5;\n", + " # Bl[j] <- 0.1;\n", + " # }\n", + " # If this is just a constant\n", + " }\n", + " }\n", + "\n", + " # Add lambda if it is needed\n", + " if(otherParameterEstimate){\n", + " j[] <- j+1;\n", + " B[j] <- other;\n", + " names(B)[j] <- \"other\";\n", + " Bl[j] <- 1e-10;\n", + " Bu[j] <- Inf;\n", + " }\n", + "\n", + " return(list(B=B,Bl=Bl,Bu=Bu));\n", + " }\n", + "\n", + "BValues <- initialiser(etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal,\n", + " componentsNumberETSNonSeasonal, componentsNumberETSSeasonal, componentsNumberETS,\n", + " lags, lagsModel, lagsModelSeasonal, lagsModelARIMA, lagsModelMax,\n", + " adamCreated$matVt,\n", + " persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate,\n", + " persistenceSeasonalEstimate, persistenceXregEstimate,\n", + " phiEstimate, initialType, initialEstimate,\n", + " initialLevelEstimate, initialTrendEstimate, initialSeasonalEstimate,\n", + " initialArimaEstimate, initialXregEstimate,\n", + " arimaModel, arRequired, maRequired, arEstimate, maEstimate, arOrders, maOrders,\n", + " componentsNumberARIMA, componentsNamesARIMA, initialArimaNumber,\n", + " xregModel, xregNumber,\n", + " xregParametersEstimated, xregParametersPersistence,\n", + " constantEstimate, constantName, otherParameterEstimate)\n", + "BValues" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "$B\n", + "alpha level \n", + " 0.1 176.4 \n", + "\n", + "$Bl\n", + "[1] 0 -Inf\n", + "\n", + "$Bu\n", + "[1] 1 Inf\n", + "\n" + ] + } + ], + "source": [ + "%%R \n", + "\n", + "BValues" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "$matVt\n", + " [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]\n", + "level 176.4 NA NA NA NA NA NA NA NA NA NA NA NA\n", + " [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]\n", + "level NA NA NA NA NA NA NA NA NA NA NA NA\n", + "\n", + "$matWt\n", + " level\n", + " [1,] 1\n", + " [2,] 1\n", + " [3,] 1\n", + " [4,] 1\n", + " [5,] 1\n", + " [6,] 1\n", + " [7,] 1\n", + " [8,] 1\n", + " [9,] 1\n", + "[10,] 1\n", + "[11,] 1\n", + "[12,] 1\n", + "[13,] 1\n", + "[14,] 1\n", + "[15,] 1\n", + "[16,] 1\n", + "[17,] 1\n", + "[18,] 1\n", + "[19,] 1\n", + "[20,] 1\n", + "[21,] 1\n", + "[22,] 1\n", + "[23,] 1\n", + "[24,] 1\n", + "\n", + "$matF\n", + " [,1]\n", + "[1,] 1\n", + "\n", + "$vecG\n", + " [,1]\n", + "alpha 0.1\n", + "\n", + "$arimaPolynomials\n", + "NULL\n", + "\n" + ] + } + ], + "source": [ + "%%R \n", + "\n", + "matVt <- adamCreated$matVt\n", + "matWt <- adamCreated$matWt\n", + "matF <- adamCreated$matF\n", + "vecG <- adamCreated$vecG\n", + "arimaPolynomials <- adamCreated$arimaPolynomials\n", + "B <- BValues$B\n", + "filler <- function(B,\n", + " etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal,\n", + " componentsNumberETS, componentsNumberETSNonSeasonal,\n", + " componentsNumberETSSeasonal, componentsNumberARIMA,\n", + " lags, lagsModel, lagsModelMax,\n", + " # The main matrices\n", + " matVt, matWt, matF, vecG,\n", + " # Persistence and phi\n", + " persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate,\n", + " persistenceSeasonalEstimate, persistenceXregEstimate,\n", + " phiEstimate,\n", + " # Initials\n", + " initialType, initialEstimate,\n", + " initialLevelEstimate, initialTrendEstimate, initialSeasonalEstimate,\n", + " initialArimaEstimate, initialXregEstimate,\n", + " # ARIMA\n", + " arimaModel, arEstimate, maEstimate, arOrders, iOrders, maOrders,\n", + " arRequired, maRequired, armaParameters,\n", + " nonZeroARI, nonZeroMA, arimaPolynomials,\n", + " # Explanatory variables\n", + " xregModel, xregNumber,\n", + " xregParametersMissing, xregParametersIncluded,\n", + " xregParametersEstimated, xregParametersPersistence,\n", + " # Constant\n", + " constantEstimate){\n", + "\n", + " j <- 0;\n", + " # Fill in persistence\n", + " if(persistenceEstimate){\n", + " # Persistence of ETS\n", + " if(etsModel){\n", + " i <- 1;\n", + " # alpha\n", + " if(persistenceLevelEstimate){\n", + " j[] <- j+1;\n", + " vecG[i] <- B[j];\n", + " }\n", + " # beta\n", + " if(modelIsTrendy){\n", + " i[] <- 2;\n", + " if(persistenceTrendEstimate){\n", + " j[] <- j+1;\n", + " vecG[i] <- B[j];\n", + " }\n", + " }\n", + " # gamma1, gamma2, ...\n", + " if(modelIsSeasonal){\n", + " if(any(persistenceSeasonalEstimate)){\n", + " vecG[i+which(persistenceSeasonalEstimate)] <- B[j+c(1:sum(persistenceSeasonalEstimate))];\n", + " j[] <- j+sum(persistenceSeasonalEstimate);\n", + " }\n", + " i[] <- componentsNumberETS;\n", + " }\n", + " }\n", + "\n", + " # Persistence of xreg\n", + " if(xregModel && persistenceXregEstimate){\n", + " xregPersistenceNumber <- max(xregParametersPersistence);\n", + " vecG[j+componentsNumberARIMA+1:length(xregParametersPersistence)] <-\n", + " B[j+1:xregPersistenceNumber][xregParametersPersistence];\n", + " j[] <- j+xregPersistenceNumber;\n", + " }\n", + " }\n", + "\n", + " # Damping parameter\n", + " if(etsModel && phiEstimate){\n", + " j[] <- j+1;\n", + " matWt[,2] <- B[j];\n", + " matF[1:2,2] <- B[j];\n", + " }\n", + "\n", + " # ARMA parameters. This goes before xreg in persistence\n", + " if(arimaModel){\n", + " # Call the function returning ARI and MA polynomials\n", + " # arimaPolynomials <- polynomialiser(B[j+1:sum(c(arOrders*arEstimate,maOrders*maEstimate))], arOrders, iOrders, maOrders,\n", + " # arRequired, maRequired, arEstimate, maEstimate, armaParameters, lags);\n", + " arimaPolynomials <- lapply(adamPolynomialiser(B[j+1:sum(c(arOrders*arEstimate,maOrders*maEstimate))],\n", + " arOrders, iOrders, maOrders,\n", + " arEstimate, maEstimate, armaParameters, lags), as.vector);\n", + "\n", + " # Fill in the transition matrix\n", + " if(nrow(nonZeroARI)>0){\n", + " matF[componentsNumberETS+nonZeroARI[,2],componentsNumberETS+1:(componentsNumberARIMA+constantRequired)] <-\n", + " -arimaPolynomials$ariPolynomial[nonZeroARI[,1]];\n", + " }\n", + " # Fill in the persistence vector\n", + " if(nrow(nonZeroARI)>0){\n", + " vecG[componentsNumberETS+nonZeroARI[,2]] <- -arimaPolynomials$ariPolynomial[nonZeroARI[,1]];\n", + " }\n", + " if(nrow(nonZeroMA)>0){\n", + " vecG[componentsNumberETS+nonZeroMA[,2]] <- vecG[componentsNumberETS+nonZeroMA[,2]] +\n", + " arimaPolynomials$maPolynomial[nonZeroMA[,1]];\n", + " }\n", + " j[] <- j+sum(c(arOrders*arEstimate,maOrders*maEstimate));\n", + " }\n", + "\n", + " # Initials of ETS if something needs to be estimated\n", + " if(etsModel && all(initialType!=c(\"complete\",\"backcasting\")) && initialEstimate){\n", + " i <- 1;\n", + " if(initialLevelEstimate){\n", + " j[] <- j+1;\n", + " matVt[i,1:lagsModelMax] <- B[j];\n", + " }\n", + " i[] <- i+1;\n", + " if(modelIsTrendy && initialTrendEstimate){\n", + " j[] <- j+1;\n", + " matVt[i,1:lagsModelMax] <- B[j];\n", + " i[] <- i+1;\n", + " }\n", + " if(modelIsSeasonal && any(initialSeasonalEstimate)){\n", + " for(k in 1:componentsNumberETSSeasonal){\n", + " if(initialSeasonalEstimate[k]){\n", + " matVt[componentsNumberETSNonSeasonal+k, 2:lagsModel[componentsNumberETSNonSeasonal+k]-1] <-\n", + " B[j+2:(lagsModel[componentsNumberETSNonSeasonal+k])-1];\n", + " matVt[componentsNumberETSNonSeasonal+k, lagsModel[componentsNumberETSNonSeasonal+k]] <-\n", + " switch(Stype,\n", + " \"A\"=-sum(B[j+2:(lagsModel[componentsNumberETSNonSeasonal+k])-1]),\n", + " \"M\"=1/prod(B[j+2:(lagsModel[componentsNumberETSNonSeasonal+k])-1]));\n", + " j[] <- j+lagsModel[componentsNumberETSNonSeasonal+k]-1;\n", + " }\n", + " }\n", + " }\n", + " }\n", + "\n", + " # Initials of ARIMA\n", + " if(arimaModel){\n", + " if(all(initialType!=c(\"complete\",\"backcasting\")) && initialArimaEstimate){\n", + " # matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber] <- B[j+1:initialArimaNumber];\n", + " # for(i in (componentsNumberARIMA-1):1){\n", + " # indeces <-\n", + " # (1+lagsModelAll[componentsNumberETS+i+1] -\n", + " # lagsModelAll[componentsNumberETS+i]):lagsModelAll[componentsNumberETS+i+1];\n", + " # matVt[componentsNumberETS+i,\n", + " # 1:lagsModelAll[componentsNumberETS+i]] <-\n", + " # (matVt[componentsNumberETS+componentsNumberARIMA, indeces] -\n", + " # # We need a sum of states here...\n", + " # matVt[componentsNumberETS+i+1, 1:lagsModelAll[componentsNumberETS+i]]);\n", + " # }\n", + "\n", + " matVt[componentsNumberETS+nonZeroARI[,2], 1:initialArimaNumber] <-\n", + " switch(Etype,\n", + " \"A\"=arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*% t(B[j+1:initialArimaNumber]),\n", + " \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*% t(log(B[j+1:initialArimaNumber]))));\n", + "\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*% t(B[j+1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$ariPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*% t(log(B[j+1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$ariPolynomial,1)));\n", + " j[] <- j+initialArimaNumber;\n", + " }\n", + " # This is needed in order to propagate initials of ARIMA to all components\n", + " else if(any(c(arEstimate,maEstimate))){\n", + " # if(nrow(nonZeroARI)>0 && nrow(nonZeroARI)>=nrow(nonZeroMA)){\n", + " # if(nrow(nonZeroARI)>0){\n", + " matVt[componentsNumberETS+nonZeroARI[,2], 1:initialArimaNumber] <-\n", + " switch(Etype,\n", + " \"A\"= arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " t(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber]),\n", + " \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber]))));\n", + "\n", + " # switch(Etype,\n", + " # \"A\"= arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " # t(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$ariPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%\n", + " # t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$ariPolynomial,1)));\n", + "\n", + " # }\n", + " # else{\n", + " # matVt[componentsNumberETS+nonZeroMA[,2],\n", + " # 1:initialArimaNumber] <-\n", + " # switch(Etype,\n", + " # \"A\"=arimaPolynomials$maPolynomial[nonZeroMA[,1]] %*%\n", + " # t(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber]) /\n", + " # tail(arimaPolynomials$maPolynomial,1),\n", + " # \"M\"=exp(arimaPolynomials$maPolynomial[nonZeroMA[,1]] %*%\n", + " # t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber])) /\n", + " # tail(arimaPolynomials$maPolynomial,1)));\n", + " # }\n", + " }\n", + " }\n", + "\n", + " # Initials of the xreg\n", + " if(xregModel && (initialType!=\"complete\") && initialEstimate && initialXregEstimate){\n", + " xregNumberToEstimate <- sum(xregParametersEstimated);\n", + " matVt[componentsNumberETS+componentsNumberARIMA+which(xregParametersEstimated==1),\n", + " 1:lagsModelMax] <- B[j+1:xregNumberToEstimate];\n", + " j[] <- j+xregNumberToEstimate;\n", + " # Normalise initials\n", + " for(i in which(xregParametersMissing!=0)){\n", + " matVt[componentsNumberETS+componentsNumberARIMA+i,\n", + " 1:lagsModelMax] <- -sum(matVt[componentsNumberETS+componentsNumberARIMA+\n", + " which(xregParametersIncluded==xregParametersMissing[i]),\n", + " 1:lagsModelMax]);\n", + " }\n", + " }\n", + "\n", + " # Constant\n", + " if(constantEstimate){\n", + " matVt[componentsNumberETS+componentsNumberARIMA+xregNumber+1,] <- B[j+1];\n", + " }\n", + "\n", + " return(list(matVt=matVt, matWt=matWt, matF=matF, vecG=vecG, arimaPolynomials=arimaPolynomials));\n", + " }\n", + "\n", + "adamElements <- filler(B,\n", + " etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal,\n", + " componentsNumberETS, componentsNumberETSNonSeasonal,\n", + " componentsNumberETSSeasonal, componentsNumberARIMA,\n", + " lags, lagsModel, lagsModelMax,\n", + " matVt, matWt, matF, vecG,\n", + " persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate,\n", + " persistenceSeasonalEstimate, persistenceXregEstimate,\n", + " phiEstimate,\n", + " initialType, initialEstimate,\n", + " initialLevelEstimate, initialTrendEstimate, initialSeasonalEstimate,\n", + " initialArimaEstimate, initialXregEstimate,\n", + " arimaModel, arEstimate, maEstimate, arOrders, iOrders, maOrders,\n", + " arRequired, maRequired, armaParameters,\n", + " nonZeroARI, nonZeroMA, arimaPolynomials,\n", + " xregModel, xregNumber,\n", + " xregParametersMissing, xregParametersIncluded,\n", + " xregParametersEstimated, xregParametersPersistence, constantEstimate)\n", + "adamElements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this is where it starts" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model = \"ANN\"\n", + "lags = [12]\n", + "\n", + "multisteps = False,\n", + "lb = None,\n", + "ub = None,\n", + "maxtime = None,\n", + "print_level = 1, # 1 or 0\n", + "maxeval = None,\n", + "h = 12\n", + "\n", + "\n", + "# Assume that the model is not provided\n", + "# these will be default arguments\n", + "profiles_recent_provided = False\n", + "profiles_recent_table = None\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial value is not selected. Switching to optimal.\n" + ] + } + ], + "source": [ + "import time\n", + "import inspect\n", + "\n", + "# Start measuring the time of calculations\n", + "start_time = time.time()\n", + "\n", + "# Get the call info\n", + "cl = inspect.currentframe()\n", + "\n", + "# Record the parental environment. Needed for ARIMA initialization\n", + "env = cl.f_back\n", + "\n", + "# Get any additional arguments\n", + "ellipsis = {}\n", + "\n", + "\n", + "# here lines 412 to 524\n", + "# what happenes when we provide the parameters of a model\n", + "# I assume for now that string is provided for the mdeol\n", + "\n", + "# do the parameter checking\n", + "(general, \n", + " observations_dict,\n", + " persistence_results, \n", + " initials_results, \n", + " arima_results, \n", + " constant_dict, \n", + " model_type_dict, \n", + " components_dict, \n", + " lags_dict, \n", + " occurrence_dict, \n", + " phi_dict,\n", + " explanatory_dict,\n", + " params_info) = parameters_checker(ts_df, model=model, lags=lags, h=h)\n", + "\n", + "# I also assume no auto.adam was provided for now and I take the parameters\n", + "# what happenes if its auto.adam?\n", + "\n", + "# then lines 4033 to 4070 deals with the occurence model\n", + "# this will also wait for a bit \n", + "\n", + "# then I also skip the regression data on lines 4036\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Initial parameters: [1.000e-01 1.764e+02]\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\n", + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\n" + ] + } + ], + "source": [ + "# Lines 4099 to 4138 -> in case we have a model do = estimate\n", + "if model_type_dict[\"model_do\"] == \"estimate\":\n", + "\n", + " # If this is LASSO/RIDGE with lambda=1, use MSE to estimate initials only\n", + " lambda_original = general['lambda']\n", + " if general['loss'] in [\"LASSO\", \"RIDGE\"] and general['lambda'] == 1:\n", + " if model_type_dict[\"ets_model\"]:\n", + " # Pre-set ETS parameters\n", + " persistence_results[\"persistence_estimate\"] = False\n", + " persistence_results[\"persistence_level_estimate\"] = False\n", + " persistence_results[\"persistence_trend_estimate\"] = False\n", + " persistence_results[\"persistence_seasonal_estimate\"] = [False]\n", + " persistence_results[\"persistence_level\"] = 0\n", + " persistence_results[\"persistence_trend\"] = 0 \n", + " persistence_results[\"persistence_seasonal\"] = [0]\n", + " # Phi\n", + " phi_dict[\"phi_estimate\"] = False\n", + " phi_dict[\"phi\"] = 1\n", + "\n", + " if model_type_dict[\"xreg_model\"]:\n", + " # ETSX parameters\n", + " persistence_results[\"persistence_xreg_estimate\"] = False\n", + " persistence_results[\"persistence_xreg\"] = 0\n", + "\n", + " if model_type_dict[\"arima_model\"]:\n", + " # Pre-set ARMA parameters\n", + " arima_results[\"ar_estimate\"] = [False]\n", + " arima_results[\"ma_estimate\"] = [False]\n", + " arma_parameters = []\n", + " j = 0\n", + " for i, lag in enumerate(lags_dict[\"lags\"]):\n", + " if arima_results[\"ar_orders\"][i] > 0:\n", + " arma_parameters.extend([1] * arima_results[\"ar_orders\"][i])\n", + " j += arima_results[\"ar_orders\"][i]\n", + " if arima_results[\"ma_orders\"][i] > 0:\n", + " arma_parameters.extend([0] * arima_results[\"ma_orders\"][i])\n", + " j += arima_results[\"ma_orders\"][i]\n", + " arima_results[\"arma_parameters\"] = arma_parameters\n", + "\n", + " general['lambda'] = 0\n", + "\n", + "\n", + " # Why we estimate first and then have architector and creator?\n", + " adam_estimated = estimator(\n", + " general_dict= general,\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " observations_dict = observations_dict,\n", + " arima_dict=arima_results,\n", + " constant_dict=constant_dict,\n", + " explanatory_dict=explanatory_dict,\n", + " profiles_recent_table= profiles_recent_table,\n", + " profiles_recent_provided= profiles_recent_provided,\n", + " persistence_dict=persistence_results,\n", + " initials_dict=initials_results,\n", + " occurrence_dict=occurrence_dict,\n", + " phi_dict=phi_dict,\n", + " components_dict=components_dict,\n", + " )\n", + " # A fix for the special case of lambda==1\n", + " lambda_ = general['lambda']\n", + "\n", + " # Build the architector\n", + " model_type_dict, components_dict, lags_dict, observations_dict, profile_dict = architector(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " observations_dict = observations_dict,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " profiles_recent_table = profiles_recent_table,\n", + " profiles_recent_provided = profiles_recent_provided\n", + ")\n", + " \n", + " # Build the creator\n", + " # Create the matrices for the specific ETS model\n", + " adam_created = creator(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " profiles_dict = profile_dict,\n", + " observations_dict = observations_dict,\n", + "\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " phi_dict = phi_dict,\n", + " components_dict = components_dict,\n", + " explanatory_checked = explanatory_dict\n", + " )\n", + "\n", + "\n", + " # An argument for ic\n", + " ic_selection = ic_function(general['ic'], adam_estimated['log_lik_adam_value'])\n", + "\n", + "\n", + "# Update parameters number \n", + "# Why we need this for now???\n", + "# we will see\n", + "n_param_estimated = adam_estimated['n_param_estimated']\n", + "\n", + "# Initialize parameters_number in general if not already present\n", + "if 'parameters_number' not in general:\n", + " general['parameters_number'] = params_info['parameters_number']\n", + "\n", + "general['parameters_number'][0][0] = n_param_estimated\n", + "\n", + "# Handle xreg model case (not needed currently )\n", + "# If likelihood loss, scale was estimated\n", + "if general['loss'] == 'likelihood':\n", + " if len(general['parameters_number'][0]) <= 3:\n", + " general['parameters_number'][0].append(1)\n", + " else:\n", + " general['parameters_number'][0][3] = 1\n", + "\n", + "# Calculate row sums\n", + "# Add 4th index if it doesn't exist\n", + "if len(general['parameters_number'][0]) <= 4:\n", + " general['parameters_number'][0].append(sum(general['parameters_number'][0][0:4]))\n", + " general['parameters_number'][1].append(sum(general['parameters_number'][1][0:4]))\n", + "else:\n", + " general['parameters_number'][0][4] = sum(general['parameters_number'][0][0:4])\n", + " general['parameters_number'][1][4] = sum(general['parameters_number'][1][0:4])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'mat_vt': array([[176.4, nan, nan, nan, nan, nan, nan, nan, nan,\n", + " nan, nan, nan, nan, nan, nan, nan, nan, nan,\n", + " nan, nan, nan, nan, nan, nan, nan]]),\n", + " 'mat_wt': array([[1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.],\n", + " [1.]]),\n", + " 'mat_f': array([[1.]]),\n", + " 'vec_g': array([[0.]]),\n", + " 'arima_polynomials': None}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adam_created" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [], + "source": [ + "# lines 4216 if we have \"select\"\n", + "#elif model_type_dict[\"model_do\"] == \"select\":\n", + "if model_type_dict[\"model_do\"] == \"select\":\n", + " adam_selected = selector( # function ot yet fully tested\n", + " model_type_dict,\n", + " phi_dict,\n", + " general, \n", + " lags_dict, \n", + " observations_dict, \n", + " arima_results,\n", + " constant_dict,\n", + " explanatory_dict,\n", + " occurrence_dict,\n", + " components_dict,\n", + " profiles_recent_table,\n", + " profiles_recent_provided,\n", + " persistence_results,\n", + " initials_results,\n", + "\n", + " criterion = \"AICc\",\n", + " silent = False\n", + " )\n", + "\n", + " ic_selection = adam_selected['ic_selection']\n", + " results = adam_selected['results']\n", + "\n", + " best_model = min(ic_selection.items(), key=lambda x: x[1])[0]\n", + " best_id = next(i for i, result in enumerate(results) if result['model'] == best_model)\n", + "\n", + " model_type_dict = results[best_id]['model_type_dict']\n", + " phi_dict = results[best_id]['phi_dict']\n", + " adam_estimated = results[best_id]['adam_estimated']\n", + "\n", + " # Build the architector\n", + " model_type_dict, components_dict, lags_dict, observations_dict, profile_dict = architector(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " observations_dict = observations_dict,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " profiles_recent_table = profiles_recent_table,\n", + " profiles_recent_provided = profiles_recent_provided\n", + " )\n", + "\n", + " # Build the creator\n", + " # Create the matrices for the specific ETS model\n", + " adam_created = creator(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " profiles_dict = profile_dict,\n", + " observations_dict = observations_dict,\n", + "\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " phi_dict = phi_dict,\n", + " components_dict = components_dict,\n", + " explanatory_checked = explanatory_dict\n", + " )\n", + "\n", + "\n", + " # Update parameters number \n", + " # Why we need this for now???\n", + " # we will see\n", + " n_param_estimated = adam_estimated['n_param_estimated']\n", + " general['parameters_number'] = params_info['parameters_number']\n", + "\n", + " general['parameters_number'][0][0] = n_param_estimated\n", + "\n", + " # Handle xreg model case (not needed currently )\n", + " # If likelihood loss, scale was estimated\n", + " if general['loss'] == 'likelihood':\n", + " if len(general['parameters_number'][0]) <= 3:\n", + " general['parameters_number'][0].append(1)\n", + " else:\n", + " general['parameters_number'][0][3] = 1\n", + "\n", + " # Calculate row sums\n", + " # Add 4th index if it doesn't exist\n", + " if len(general['parameters_number'][0]) <= 4:\n", + " general['parameters_number'][0].append(sum(general['parameters_number'][0][0:4]))\n", + " general['parameters_number'][1].append(sum(general['parameters_number'][1][0:4]))\n", + " else:\n", + " general['parameters_number'][0][4] = sum(general['parameters_number'][0][0:4])\n", + " general['parameters_number'][1][4] = sum(general['parameters_number'][1][0:4])" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "# if model combine\n", + "# lines 4290 \n", + "\n", + "# elif general['model_do'] == 'combine':\n", + "if model_type_dict['model_do'] == 'combine':\n", + " model_original = model_type_dict\n", + " # If models pool is not provided, create one\n", + " if general['models_pool'] is None:\n", + " # Define the whole pool of errors\n", + " if not model_type_dict['allow_multiplicative']:\n", + " pool_errors = ['A']\n", + " pool_trends = ['N', 'A', 'Ad'] \n", + " pool_seasonals = ['N', 'A']\n", + " else:\n", + " pool_errors = ['A', 'M']\n", + " pool_trends = ['N', 'A', 'Ad', 'M', 'Md']\n", + " pool_seasonals = ['N', 'A', 'M']\n", + "\n", + " # Some preparation variables\n", + " # If error_type is not Z, check on additive errors\n", + " if model_type_dict['error_type'] != 'Z':\n", + " if model_type_dict['error_type'] == 'N':\n", + " pool_errors = ['N']\n", + " elif model_type_dict['error_type'] in ['A', 'X']:\n", + " pool_errors = ['A']\n", + " elif model_type_dict['error_type'] in ['M', 'Y']:\n", + " pool_errors = ['M']\n", + "\n", + " # If trend_type is not Z, create pool with specified type\n", + " if model_type_dict['trend_type'] != 'Z':\n", + " if model_type_dict['trend_type'] == 'N':\n", + " pool_trends = ['N']\n", + " elif model_type_dict['trend_type'] == 'A':\n", + " pool_trends = ['Ad' if model_type_dict['damped'] else 'A']\n", + " elif model_type_dict['trend_type'] == 'M':\n", + " pool_trends = ['Md' if model_type_dict['damped'] else 'M']\n", + " elif model_type_dict['trend_type'] == 'X':\n", + " pool_trends = ['N', 'A', 'Ad']\n", + " elif model_type_dict['trend_type'] == 'Y':\n", + " pool_trends = ['N', 'M', 'Md']\n", + "\n", + " # If season_type is not Z, create specific pools\n", + " if model_type_dict['season_type'] != 'Z':\n", + " if model_type_dict['season_type'] == 'N':\n", + " pool_seasonals = ['N']\n", + " elif model_type_dict['season_type'] == 'A':\n", + " pool_seasonals = ['A']\n", + " elif model_type_dict['season_type'] == 'X':\n", + " pool_seasonals = ['N', 'A']\n", + " elif model_type_dict['season_type'] == 'M':\n", + " pool_seasonals = ['M']\n", + " elif model_type_dict['season_type'] == 'Y':\n", + " pool_seasonals = ['N', 'M']\n", + "\n", + " # Create models pool by combining all possibilities\n", + " general['models_pool'] = [e + t + s for e in pool_errors \n", + " for t in pool_trends \n", + " for s in pool_seasonals]\n", + "\n", + " # run the selector\n", + " adam_selected = selector( # function ot yet fully tested\n", + " model_type_dict,\n", + " phi_dict,\n", + " general, \n", + " lags_dict, \n", + " observations_dict, \n", + " arima_results,\n", + " constant_dict,\n", + " explanatory_dict,\n", + " occurrence_dict,\n", + " components_dict,\n", + " profiles_recent_table,\n", + " profiles_recent_provided,\n", + " persistence_results,\n", + " initials_results,\n", + "\n", + " criterion = \"AICc\",\n", + " silent = False\n", + " )\n", + "\n", + " ic_selection = adam_selected['ic_selection']\n", + " results = adam_selected['results']\n", + "\n", + " ic_best = min(ic_selection.values())\n", + " # Calculate ic weights based on ic_best\n", + " ic_weights = {model: np.exp(-0.5 * (ic - ic_best)) for model, ic in ic_selection.items()}\n", + " weights_sum = sum(ic_weights.values())\n", + " ic_weights = {model: weight/weights_sum for model, weight in ic_weights.items()}\n", + "\n", + " # Set very small weights to 0 as a failsafe\n", + " ic_weights = {model: 0 if weight < 1e-5 else weight for model, weight in ic_weights.items()}\n", + " weights_sum = sum(ic_weights.values()) \n", + " ic_weights = {model: weight/weights_sum for model, weight in ic_weights.items()}\n", + "\n", + " # Add weights to adam_selected dictionary\n", + " adam_selected['ic_weights'] = ic_weights\n", + "\n", + " # Iterate through all results\n", + " for i in range(len(adam_selected['results'])):\n", + " # Get current result\n", + " result = adam_selected['results'][i]\n", + "\n", + " model_type_dict, components_dict, lags_dict, observations_dict, profile_dict = architector(\n", + " model_type_dict = result['model_type_dict'],\n", + " lags_dict = lags_dict,\n", + " observations_dict = observations_dict,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " profiles_recent_table = profiles_recent_table,\n", + " profiles_recent_provided = profiles_recent_provided\n", + " )\n", + " # add to adam_selected\n", + " adam_selected['results'][i]['model_type_dict'] = model_type_dict\n", + " adam_selected['results'][i]['components_dict'] = components_dict\n", + " adam_selected['results'][i]['lags_dict'] = lags_dict\n", + " adam_selected['results'][i]['observations_dict'] = observations_dict\n", + " adam_selected['results'][i]['profile_dict'] = profile_dict\n", + "\n", + " #creator\n", + " adam_created = creator(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " profiles_dict = profile_dict,\n", + " observations_dict = observations_dict,\n", + "\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " phi_dict = phi_dict,\n", + " components_dict = components_dict,\n", + " explanatory_checked = explanatory_dict\n", + " )\n", + "\n", + " # add to adam_selected\n", + " adam_selected['results'][i]['adam_created'] = adam_created\n", + "\n", + " # update parameters number\n", + " n_param_estimated = result['adam_estimated']['n_param_estimated']\n", + " general['parameters_number'] = params_info['parameters_number']\n", + "\n", + " general['parameters_number'][0][0] = n_param_estimated\n", + "\n", + " # Handle xreg model case (not needed currently )\n", + " # If likelihood loss, scale was estimated\n", + " if general['loss'] == 'likelihood':\n", + " if len(general['parameters_number'][0]) <= 3:\n", + " general['parameters_number'][0].append(1)\n", + " else:\n", + " general['parameters_number'][0][3] = 1\n", + "\n", + " # Calculate row sums\n", + " # Add 4th index if it doesn't exist\n", + " if len(general['parameters_number'][0]) <= 4:\n", + " general['parameters_number'][0].append(sum(general['parameters_number'][0][0:4]))\n", + " general['parameters_number'][1].append(sum(general['parameters_number'][1][0:4]))\n", + " else:\n", + " general['parameters_number'][0][4] = sum(general['parameters_number'][0][0:4])\n", + " general['parameters_number'][1][4] = sum(general['parameters_number'][1][0:4])\n", + "\n", + " # add to adam_selected\n", + " adam_selected['results'][i]['parameters_number'] = general['parameters_number']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "# line 4445 if model use\n", + "# i will skip this for now\n", + "\n", + "# elif model_type_dict['model_do'] == 'use':\n", + "\n", + "# If the distribution is default, change it according to the error term\n", + "if general['distribution'] == \"default\":\n", + " if general['loss'] == \"likelihood\":\n", + " if model_type_dict['error_type'] == \"A\":\n", + " general['distribution_new'] = \"dnorm\"\n", + " elif model_type_dict['error_type'] == \"M\":\n", + " general['distribution_new'] = \"dgamma\"\n", + " elif general['loss'] in [\"MAEh\", \"MACE\", \"MAE\"]:\n", + " general['distribution_new'] = \"dlaplace\"\n", + " elif general['loss'] in [\"HAMh\", \"CHAM\", \"HAM\"]:\n", + " general['distribution_new'] = \"ds\"\n", + " elif general['loss'] in [\"MSEh\", \"MSCE\", \"MSE\", \"GPL\"]:\n", + " general['distribution_new'] = \"dnorm\"\n", + "else:\n", + " general['distribution_new'] = general['distribution']\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Transform everything into appropriate classes\n", + "\n", + "# I assume we have either numpy or pandas series\n", + "if isinstance(observations_dict['y_in_sample'], np.ndarray):\n", + " y_in_sample = pd.Series(observations_dict['y_in_sample'], index=pd.date_range(start=observations_dict['y_start'], periods=len(observations_dict['y_in_sample']), freq=observations_dict['frequency']))\n", + " if general['holdout']:\n", + " y_holdout = pd.Series(observations_dict['y_holdout'], \n", + " index=pd.date_range(start=observations_dict['y_forecast_start'], \n", + " periods=len(observations_dict['y_holdout']), \n", + " freq=observations_dict['frequency']))\n", + "else:\n", + " y_in_sample = observations_dict['y_in_sample'].copy()\n", + " \n", + " if general['holdout']:\n", + " y_holdout = pd.Series(observations_dict['y_holdout'], \n", + " index=observations_dict['y_forecast_index'])" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B': array([1.000e-01, 1.764e+02]),\n", + " 'CF_value': 1044.3125265799933,\n", + " 'n_param_estimated': 2,\n", + " 'log_lik_adam_value': {'value': -1044.3125265799933, 'nobs': 24, 'df': 3},\n", + " 'arima_polynomials': None}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Getting the preparator \n", + "adam_estimated" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "observations_dict['frequency']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = \"ANN\"\n", + "lags = [12]\n", + "\n", + "multisteps = False,\n", + "lb = None,\n", + "ub = None,\n", + "maxtime = None,\n", + "print_level = 1, # 1 or 0\n", + "maxeval = None,\n", + "h = 12\n", + "\n", + "\n", + "# Assume that the model is not provided\n", + "# these will be default arguments\n", + "profiles_recent_provided = False\n", + "profiles_recent_table = None\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'lags': [1, 12],\n", + " 'lags_model': [[1]],\n", + " 'lags_model_seasonal': [12],\n", + " 'lags_model_arima': [],\n", + " 'lags_model_all': [[1]],\n", + " 'max_lag': 12,\n", + " 'lags_length': 2,\n", + " 'lags_model_max': 1}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lags_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "out = preparator(model_type_dict,\n", + " \n", + " # Components info\n", + " components_dict,\n", + " \n", + " # Lags info\n", + " lags_dict,\n", + " \n", + " # Matrices from creator\n", + " adam_created,\n", + " \n", + " # Parameter dictionaries\n", + " persistence_results,\n", + " initials_results,\n", + " arima_results,\n", + " explanatory_dict,\n", + " phi_dict,\n", + " constant_dict,\n", + " \n", + " # Other parameters\n", + " observations_dict,\n", + " occurrence_dict,\n", + " general,\n", + " profile_dict,\n", + " \n", + " # The parameter vector\n", + " adam_estimated,\n", + " \n", + " # Optional parameters\n", + " bounds=\"usual\",\n", + " other=None\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2025-01-31 14.991034\n", + "2025-02-28 14.991034\n", + "2025-03-31 14.991034\n", + "2025-04-30 14.991034\n", + "2025-05-31 14.991034\n", + "2025-06-30 14.991034\n", + "2025-07-31 14.991034\n", + "2025-08-31 14.991034\n", + "2025-09-30 14.991034\n", + "2025-10-31 14.991034\n", + "2025-11-30 14.991034\n", + "2025-12-31 14.991034\n", + "Freq: M, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out['forecast']" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2025-01-31 14.991034\n", + "2025-02-28 14.991034\n", + "2025-03-31 14.991034\n", + "2025-04-30 14.991034\n", + "2025-05-31 14.991034\n", + "2025-06-30 14.991034\n", + "2025-07-31 14.991034\n", + "2025-08-31 14.991034\n", + "2025-09-30 14.991034\n", + "2025-10-31 14.991034\n", + "2025-11-30 14.991034\n", + "2025-12-31 14.991034\n", + "Freq: M, dtype: float64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out['forecast']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "matrices_dict = filler(\n", + " adam_estimated['B'],\n", + " model_type_dict = model_type_dict,\n", + " components_dict = components_dict,\n", + " lags_dict = lags_dict,\n", + " matrices_dict = adam_created,\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " explanatory_checked = explanatory_dict,\n", + " phi_dict = phi_dict,\n", + " constants_checked = constant_dict\n", + " )\n", + "# Write down phi\n", + "if phi_dict[\"phi_estimate\"]:\n", + " phi_dict[\"phi\"] = adam_estimated['B'][next(i for i,v in enumerate(B.keys()) if v==\"phi\")]\n", + "\n", + "# Write down the initials in the recent profile\n", + "profile_dict[\"profiles_recent_table\"][:] = matrices_dict['mat_vt'][:, :lags_dict[\"lags_model_max\"]]\n", + "profile_dict[\"profiles_recent_initial\"] = matrices_dict['mat_vt'][:, :lags_dict[\"lags_model_max\"]].copy()\n", + "adam_fitted = adam_fitter(\n", + " matrices_dict['mat_vt'], matrices_dict['mat_wt'], matrices_dict['mat_f'], matrices_dict['vec_g'],\n", + " lags_dict['lags_model_all'], profile_dict['index_lookup_table'], profile_dict['profiles_recent_table'],\n", + " model_type_dict[\"error_type\"], model_type_dict[\"trend_type\"], model_type_dict[\"season_type\"], \n", + " components_dict[\"components_number_ets\"], components_dict[\"components_number_ets_seasonal\"],\n", + " components_dict.get(\"components_number_arima\", 0), explanatory_dict[\"xreg_number\"], \n", + " constant_dict[\"constant_required\"],\n", + " observations_dict[\"y_in_sample\"], observations_dict[\"ot\"], \n", + " any(x in initials_results[\"initial_type\"] for x in [\"complete\", \"backcasting\"])\n", + ")\n", + "matrices_dict['mat_vt'][:] = adam_fitted[\"matVt\"]\n", + "profile_dict[\"profiles_recent_table\"] = adam_fitted[\"profile\"]\n", + "\n", + "# Make sure that there are no negative values in multiplicative components\n", + "# This might appear in case of bounds=\"a\"\n", + "if model_type_dict[\"trend_type\"] == \"M\" and (np.any(np.isnan(matrices_dict['mat_vt'][1,:])) or np.any(matrices_dict['mat_vt'][1,:] <= 0)):\n", + " i = np.where(matrices_dict['mat_vt'][1,:] <= 0)[0]\n", + " matrices_dict['mat_vt'][1,i] = 1e-6\n", + " profile_dict[\"profiles_recent_table\"][1,i] = 1e-6\n", + "\n", + "if model_type_dict[\"season_type\"] == \"M\" and np.all(~np.isnan(matrices_dict['mat_vt'][components_dict[\"components_number_ets_non_seasonal\"]:components_dict[\"components_number_ets_non_seasonal\"]+components_dict[\"components_number_ets_seasonal\"],:])) and \\\n", + " np.any(matrices_dict['mat_vt'][components_dict[\"components_number_ets_non_seasonal\"]:components_dict[\"components_number_ets_non_seasonal\"]+components_dict[\"components_number_ets_seasonal\"],:] <= 0):\n", + " i = np.where(matrices_dict['mat_vt'][components_dict[\"components_number_ets_non_seasonal\"]:components_dict[\"components_number_ets_non_seasonal\"]+components_dict[\"components_number_ets_seasonal\"],:] <= 0)[0]\n", + " matrices_dict['mat_vt'][components_dict[\"components_number_ets_non_seasonal\"]:components_dict[\"components_number_ets_non_seasonal\"]+components_dict[\"components_number_ets_seasonal\"],i] = 1e-6\n", + " i = np.where(profile_dict[\"profiles_recent_table\"][components_dict[\"components_number_ets_non_seasonal\"]:components_dict[\"components_number_ets_non_seasonal\"]+components_dict[\"components_number_ets_seasonal\"],:] <= 0)[0]\n", + " profile_dict[\"profiles_recent_table\"][components_dict[\"components_number_ets_non_seasonal\"]:components_dict[\"components_number_ets_non_seasonal\"]+components_dict[\"components_number_ets_seasonal\"],i] = 1e-6\n", + "\n", + "# Prepare fitted and error with ts / zoo\n", + "if not isinstance(observations_dict[\"y_in_sample\"], pd.Series):\n", + " y_fitted = pd.Series(np.full(observations_dict[\"obs_in_sample\"], np.nan), \n", + " index=pd.date_range(start=observations_dict[\"y_start\"], \n", + " periods=observations_dict[\"obs_in_sample\"], \n", + " freq=observations_dict[\"frequency\"]))\n", + " errors = pd.Series(np.full(observations_dict[\"obs_in_sample\"], np.nan), \n", + " index=pd.date_range(start=observations_dict[\"y_start\"], \n", + " periods=observations_dict[\"obs_in_sample\"], \n", + " freq=observations_dict[\"frequency\"]))\n", + "else:\n", + " y_fitted = pd.Series(np.full(observations_dict[\"obs_in_sample\"], np.nan), index=observations_dict[\"y_in_sample_index\"])\n", + " errors = pd.Series(np.full(observations_dict[\"obs_in_sample\"], np.nan), index=observations_dict[\"y_in_sample_index\"])\n", + "\n", + "errors[:] = adam_fitted[\"errors\"].flatten()\n", + "y_fitted[:] = adam_fitted[\"yFitted\"].flatten()\n", + "\n", + " # Check what was returned in the end\n", + "if np.any(np.isnan(y_fitted)) or np.any(pd.isna(y_fitted)):\n", + " warnings.warn(\"Something went wrong in the estimation of the model and NaNs were produced. \"\n", + " \"If this is a mixed model, consider using the pure ones instead.\")\n", + "\n", + "if occurrence_dict[\"occurrence_model\"]:\n", + " y_fitted[:] = y_fitted * occurrence_dict[\"p_fitted\"]\n", + "\n", + "# Fix the cases, when we have zeroes in the provided occurrence\n", + "if occurrence_dict[\"occurrence\"] == \"provided\":\n", + " y_fitted[~occurrence_dict[\"ot_logical\"]] = y_fitted[~occurrence_dict[\"ot_logical\"]] * occurrence_dict[\"p_fitted\"][~occurrence_dict[\"ot_logical\"]]\n", + "\n", + "# Produce forecasts if the horizon is non-zero\n", + "if general[\"h\"] > 0:\n", + " if not isinstance(observations_dict.get(\"y_in_sample\"), pd.Series):\n", + " y_forecast = pd.Series(np.full(general[\"h\"], np.nan), \n", + " index=pd.date_range(start=observations_dict[\"y_forecast_start\"], \n", + " periods=general[\"h\"], \n", + " freq=observations_dict[\"frequency\"]))\n", + " else:\n", + " y_forecast = pd.Series(np.full(general[\"h\"], np.nan), \n", + " index=observations_dict[\"y_forecast_index\"])\n", + "y_forecast[:] = adam_forecaster(\n", + " matrixWt=matrices_dict['mat_wt'][-general[\"h\"]:],\n", + " matrixF=matrices_dict['mat_f'],\n", + " lags=lags_dict[\"lags_model_all\"],\n", + " indexLookupTable=profile_dict[\"index_lookup_table\"],\n", + " profilesRecent=profile_dict[\"profiles_recent_table\"],\n", + " E=model_type_dict[\"error_type\"],\n", + " T=model_type_dict[\"trend_type\"],\n", + " S=model_type_dict[\"season_type\"],\n", + " nNonSeasonal=components_dict[\"components_number_ets\"],\n", + " nSeasonal=components_dict[\"components_number_ets_seasonal\"],\n", + " nArima=components_dict.get(\"components_number_arima\", 0),\n", + " nXreg=explanatory_dict[\"xreg_number\"],\n", + " constant=constant_dict[\"constant_required\"],\n", + " horizon=general[\"h\"]\n", + " ).flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2025-01-31 14.991034\n", + "2025-02-28 14.991034\n", + "2025-03-31 14.991034\n", + "2025-04-30 14.991034\n", + "2025-05-31 14.991034\n", + "2025-06-30 14.991034\n", + "2025-07-31 14.991034\n", + "2025-08-31 14.991034\n", + "2025-09-30 14.991034\n", + "2025-10-31 14.991034\n", + "2025-11-30 14.991034\n", + "2025-12-31 14.991034\n", + "Freq: M, dtype: float64" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "y_forecast" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B': array([1.000e-01, 1.764e+02]),\n", + " 'CF_value': 1044.3125265799933,\n", + " 'n_param_estimated': 2,\n", + " 'log_lik_adam_value': {'value': -1044.3125265799933, 'nobs': 24, 'df': 3},\n", + " 'arima_polynomials': None}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adam_estimated" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B': array([1.000e-01, 1.764e+02]),\n", + " 'CF_value': 1044.3125265799933,\n", + " 'n_param_estimated': 2,\n", + " 'log_lik_adam_value': {'value': -1044.3125265799933, 'nobs': 24, 'df': 3},\n", + " 'arima_polynomials': None}" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adam_estimated" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\n" + ] + } + ], + "source": [ + "model_type_dict, components_dict, lags_dict, observations_dict, profile_dict = architector(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " observations_dict = observations_dict,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " profiles_recent_table = profiles_recent_table,\n", + " profiles_recent_provided = profiles_recent_provided\n", + ")\n", + "\n", + "# Create the matrices for the specific ETS model\n", + "adam_created = creator(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " profiles_dict = profile_dict,\n", + " observations_dict = observations_dict,\n", + "\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " phi_dict = phi_dict,\n", + " components_dict = components_dict,\n", + " explanatory_checked = explanatory_dict\n", + ")\n", + "\n", + "# Initialize B\n", + "b_values = initialiser(\n", + " model_type_dict = model_type_dict,\n", + " components_dict = components_dict,\n", + " lags_dict = lags_dict,\n", + " adam_created = adam_created,\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " observations_dict = observations_dict,\n", + " bounds = general['bounds'],\n", + " phi_dict = phi_dict,\n", + ")\n", + "# Create the vector of initials for the optimisation\n", + "#if B is None:\n", + "B = b_values['B']\n", + "#if lb is None:\n", + "lb = b_values['Bl']\n", + "#if ub is None:\n", + "ub = b_values['Bu']\n", + "\n", + "\n", + "# Companion matrices for the polynomials calculation -> stationarity / stability checks\n", + "if model_type_dict['arima_model']:\n", + " # AR polynomials\n", + " ar_polynomial_matrix = np.zeros((np.sum(arima_results['ar_orders']) * lags_dict['lags'], np.sum(arima_results['ar_orders']) * lags_dict['lags']))\n", + " if ar_polynomial_matrix.shape[0] > 1:\n", + " ar_polynomial_matrix[1:, :-1] = np.eye(ar_polynomial_matrix.shape[0] - 1)\n", + " # MA polynomials\n", + " ma_polynomial_matrix = np.zeros((np.sum(arima_results['ma_orders']) * lags_dict['lags'], np.sum(arima_results['ma_orders']) * lags_dict['lags']))\n", + " if ma_polynomial_matrix.shape[0] > 1:\n", + " ma_polynomial_matrix[1:, :-1] = np.eye(ma_polynomial_matrix.shape[0] - 1)\n", + "else:\n", + " ma_polynomial_matrix = ar_polynomial_matrix = None\n", + "\n", + "\n", + "if general['distribution'] == \"default\":\n", + " if general['loss'] == \"likelihood\":\n", + " general['distribution_new'] = \"dnorm\" if model_type_dict['error_type'] == \"A\" else \"dgamma\"\n", + " elif general['loss'] in [\"MAEh\", \"MACE\", \"MAE\"]:\n", + " general['distribution_new'] = \"dlaplace\"\n", + " elif general['loss'] in [\"HAMh\", \"CHAM\", \"HAM\"]:\n", + " general['distribution_new'] = \"ds\"\n", + " else:\n", + " general['distribution_new'] = \"dnorm\"\n", + "else:\n", + " general['distribution_new'] = general['distribution']\n", + "\n", + "# Handle LASSO/RIDGE denominator calculation\n", + "if general['loss'] in [\"LASSO\", \"RIDGE\"]:\n", + " if explanatory_dict['xreg_number'] > 0:\n", + " # Calculate standard deviation for each column of matWt\n", + " general['denominator'] = np.std(adam_created['mat_wt'], axis=0)\n", + " # Replace infinite values with 1\n", + " general['denominator'][np.isinf(general['denominator'])] = 1\n", + " else:\n", + " general['denominator'] = None\n", + " # Calculate denominator for y values\n", + " general['y_denominator'] = max(np.std(np.diff(observations_dict['y_in_sample'])), 1)\n", + "else:\n", + " general['denominator'] = None\n", + " general['y_denominator'] = None\n", + "\n", + "general['multisteps'] = multisteps\n" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "ename": "UnboundLocalError", + "evalue": "local variable 'CFValue' referenced before assignment", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mUnboundLocalError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[98], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mCF\u001b[49m\u001b[43m(\u001b[49m\u001b[43mB\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_type_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mcomponents_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mlags_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43madam_created\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mpersistence_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43minitials_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43marima_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mexplanatory_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mphi_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mconstant_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mobservations_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mprofile_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneral\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mbounds\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[10], line 248\u001b[0m, in \u001b[0;36mCF\u001b[0;34m(B, model_type_dict, components_dict, lags_dict, matrices_dict, persistence_checked, initials_checked, arima_checked, explanatory_checked, phi_dict, constants_checked, observations_dict, profile_dict, general, bounds, other, otherParameterEstimate, arPolynomialMatrix, maPolynomialMatrix, regressors)\u001b[0m\n\u001b[1;32m 232\u001b[0m CFValue \u001b[38;5;241m=\u001b[39m general[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mloss_function\u001b[39m\u001b[38;5;124m'\u001b[39m](actual\u001b[38;5;241m=\u001b[39mobservations_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124my_in_sample\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m 233\u001b[0m fitted\u001b[38;5;241m=\u001b[39madam_fitted[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124myFitted\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m 234\u001b[0m B\u001b[38;5;241m=\u001b[39mB)\n\u001b[1;32m 235\u001b[0m \u001b[38;5;66;03m#else:\u001b[39;00m\n\u001b[1;32m 236\u001b[0m \u001b[38;5;66;03m# currently no multistep loss function\u001b[39;00m\n\u001b[1;32m 237\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 245\u001b[0m \n\u001b[1;32m 246\u001b[0m \u001b[38;5;66;03m#CFValue = calculate_multistep_loss(general['loss'], adamErrors, observations_dict['obs_in_sample'], general['horizon'])\u001b[39;00m\n\u001b[0;32m--> 248\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m np\u001b[38;5;241m.\u001b[39misnan(\u001b[43mCFValue\u001b[49m):\n\u001b[1;32m 249\u001b[0m CFValue \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1e300\u001b[39m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m CFValue\n", + "\u001b[0;31mUnboundLocalError\u001b[0m: local variable 'CFValue' referenced before assignment" + ] + } + ], + "source": [ + "CF(B, model_type_dict,\n", + " components_dict,\n", + " lags_dict,\n", + " adam_created,\n", + " persistence_results,\n", + " initials_results,\n", + " arima_results,\n", + " explanatory_dict,\n", + " phi_dict,\n", + " constant_dict,\n", + " observations_dict,\n", + " profile_dict,\n", + " general,\n", + " \n", + " bounds = None)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "# Create nlopt optimizer object\n", + "opt = nlopt.opt(nlopt.LD_SLSQP, len(B)) # Use SLSQP algorithm to match R code\n", + "\n", + "# Set bounds\n", + "opt.set_lower_bounds(lb)\n", + "opt.set_upper_bounds(ub)\n", + "opt.set_xtol_rel(1e-6) # Relative tolerance on optimization parameters\n", + "opt.set_ftol_rel(1e-6) # Relative tolerance on function value\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'profiles_recent_table': array([[1.]]),\n", + " 'profiles_recent_provided': False,\n", + " 'index_lookup_table': array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0]])}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "profile_dict" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "# Work on the filler here\n", + "adamElements = filler(B,\n", + " model_type_dict,\n", + " components_dict,\n", + " lags_dict,\n", + " adam_created,\n", + " persistence_results,\n", + " initials_results,\n", + " arima_results,\n", + " explanatory_dict,\n", + " phi_dict,\n", + " constant_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'profiles_recent_table'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[109], line 21\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# Set objective function\u001b[39;00m\n\u001b[1;32m 18\u001b[0m opt\u001b[38;5;241m.\u001b[39mset_min_objective(objective_wrapper)\n\u001b[0;32m---> 21\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[43mopt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mB\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 22\u001b[0m res_fun \u001b[38;5;241m=\u001b[39m opt\u001b[38;5;241m.\u001b[39mlast_optimum_value()\n\u001b[1;32m 23\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOptimizeResult\u001b[39m\u001b[38;5;124m'\u001b[39m, (), {\n\u001b[1;32m 24\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mx\u001b[39m\u001b[38;5;124m'\u001b[39m: x,\n\u001b[1;32m 25\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfun\u001b[39m\u001b[38;5;124m'\u001b[39m: res_fun,\n\u001b[1;32m 26\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msuccess\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 27\u001b[0m })\n", + "File \u001b[0;32m~/smooth/.venv/lib/python3.8/site-packages/nlopt/nlopt.py:335\u001b[0m, in \u001b[0;36mopt.optimize\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moptimize\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs):\n\u001b[0;32m--> 335\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_nlopt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mopt_optimize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[109], line 2\u001b[0m, in \u001b[0;36mobjective_wrapper\u001b[0;34m(x, grad)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mobjective_wrapper\u001b[39m(x, grad):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mCF\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_type_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mcomponents_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mlags_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43madam_created\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mpersistence_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43minitials_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43marima_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mexplanatory_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mphi_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mconstant_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43mobservations_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43mgeneral\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mprofile_dict\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[99], line 130\u001b[0m, in \u001b[0;36mCF\u001b[0;34m(B, model_type_dict, components_dict, lags_dict, matrices_dict, persistence_checked, initials_checked, arima_checked, explanatory_checked, phi_dict, constants_checked, observations_dict, profile_dict, general, bounds, other, otherParameterEstimate, arPolynomialMatrix, maPolynomialMatrix, regressors)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m1e100\u001b[39m \u001b[38;5;241m*\u001b[39m np\u001b[38;5;241m.\u001b[39mmax(eigenValues)\n\u001b[1;32m 129\u001b[0m \u001b[38;5;66;03m# Write down the initials in the recent profile\u001b[39;00m\n\u001b[0;32m--> 130\u001b[0m \u001b[43mprofile_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mprofiles_recent_table\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m[:] \u001b[38;5;241m=\u001b[39m adamElements[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmat_vt\u001b[39m\u001b[38;5;124m'\u001b[39m][:, :lags_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlags_model_max\u001b[39m\u001b[38;5;124m'\u001b[39m]]\n\u001b[1;32m 132\u001b[0m \u001b[38;5;66;03m# Fitter and the losses calculation\u001b[39;00m\n\u001b[1;32m 133\u001b[0m adam_fitted \u001b[38;5;241m=\u001b[39m adam_fitter(adamElements[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmat_vt\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m 134\u001b[0m adamElements[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmat_wt\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m 135\u001b[0m adamElements[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmat_f\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 149\u001b[0m observations_dict[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mot\u001b[39m\u001b[38;5;124m'\u001b[39m], \n\u001b[1;32m 150\u001b[0m \u001b[38;5;28many\u001b[39m([t \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcomplete\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m t \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbackcasting\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m t \u001b[38;5;129;01min\u001b[39;00m initials_checked[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124minitial_type\u001b[39m\u001b[38;5;124m'\u001b[39m]]))\n", + "\u001b[0;31mKeyError\u001b[0m: 'profiles_recent_table'" + ] + } + ], + "source": [ + "def objective_wrapper(x, grad):\n", + " return CF(x,\n", + " model_type_dict,\n", + " components_dict,\n", + " lags_dict,\n", + " adam_created,\n", + " persistence_results,\n", + " initials_results,\n", + " arima_results,\n", + " explanatory_dict,\n", + " phi_dict,\n", + " constant_dict,\n", + " observations_dict,\n", + " general,\n", + " profile_dict)\n", + "\n", + "# Set objective function\n", + "opt.set_min_objective(objective_wrapper)\n", + "\n", + "\n", + "x = opt.optimize(B)\n", + "res_fun = opt.last_optimum_value()\n", + "res = type('OptimizeResult', (), {\n", + " 'x': x,\n", + " 'fun': res_fun,\n", + " 'success': True\n", + "})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1044.3125265799933" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res.fun" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "general, \n", + " observations_dict,\n", + " persistence_results, \n", + " initials_results, \n", + " arima_results, \n", + " constant_dict, \n", + " model_type_dict, \n", + " components_dict, \n", + " lags_dict, \n", + " occurrence_dict, \n", + " phi_dict,\n", + " explanatory_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'distribution': 'default',\n", + " 'loss': 'likelihood',\n", + " 'outliers': 'ignore',\n", + " 'h': 0,\n", + " 'holdout': False,\n", + " 'ic': 'AICc',\n", + " 'bounds': 'usual',\n", + " 'model_do': 'estimate',\n", + " 'fast': False,\n", + " 'models_pool': None,\n", + " 'lambda': 1,\n", + " 'persistence_params': None,\n", + " 'arma_params': None,\n", + " 'distribution_new': 'dnorm',\n", + " 'denominator': None,\n", + " 'y_denominator': None,\n", + " 'multisteps': False}" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 171, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_type_dict['arima_model']" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "(\n", + " model_type_dict,\n", + " components_dict,\n", + " lags_dict,\n", + " observations_dict,\n", + " profile_dict\n", + ") = architector(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " observations_dict = observations_dict,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " #profiles_recent_table = profiles_recent_table,\n", + " #profiles_recent_provided = profiles_recent_provided,\n", + "\n", + " # not used for now\n", + " #xreg_number = params['xreg_number'],\n", + " #xreg_model = params['xreg_model'],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'initial': None,\n", + " 'initial_type': 'optimal',\n", + " 'initial_estimate': True,\n", + " 'initial_level': None,\n", + " 'initial_level_estimate': True,\n", + " 'initial_trend': None,\n", + " 'initial_trend_estimate': True,\n", + " 'initial_seasonal': None,\n", + " 'initial_seasonal_estimate': True,\n", + " 'initial_arima': None,\n", + " 'initial_arima_estimate': True,\n", + " 'initial_arima_number': 0,\n", + " 'initial_xreg_estimate': True,\n", + " 'initial_xreg_provided': False}" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "initials_results" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/filtheo/smooth/.venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3462: FutureWarning: In a future version, DataFrame.mean(axis=None) will return a scalar mean over the entire DataFrame. To retain the old behavior, use 'frame.mean(axis=0)' or just 'frame.mean()'\n", + " return mean(axis=axis, dtype=dtype, out=out, **kwargs)\n" + ] + }, + { + "data": { + "text/plain": [ + "dict_keys(['mat_vt', 'mat_wt', 'mat_f', 'vec_g', 'arima_polynomials'])" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adam_created = creator(\n", + " model_type_dict = model_type_dict,\n", + " lags_dict = lags_dict,\n", + " profiles_dict = profile_dict,\n", + " observations_dict = observations_dict,\n", + "\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " phi_dict = phi_dict,\n", + " components_dict = components_dict,\n", + " explanatory_checked = explanatory_dict,\n", + ")\n", + "adam_created.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'B': array([1.000e-01, 1.764e+02]),\n", + " 'Bl': array([ 0., -inf]),\n", + " 'Bu': array([ 1., inf]),\n", + " 'names': ['alpha', 'level']}" + ] + }, + "execution_count": 153, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b_values = initialiser(\n", + " model_type_dict = model_type_dict,\n", + " components_dict = components_dict,\n", + " lags_dict = lags_dict,\n", + " adam_created = adam_created,\n", + " persistence_checked = persistence_results,\n", + " initials_checked = initials_results,\n", + " arima_checked = arima_results,\n", + " constants_checked = constant_dict,\n", + " explanatory_checked = explanatory_dict,\n", + " observations_dict = observations_dict,\n", + " bounds = general['bounds'],\n", + " phi_dict = phi_dict,\n", + ")\n", + "b_values" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'list' object has no attribute 'isin'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[152], line 4\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m b_values[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m b_values:\n\u001b[0;32m----> 4\u001b[0m b_values[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m b_values[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[43mb_values\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnames\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misin\u001b[49m(b_values[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mnames)]\n\u001b[1;32m 5\u001b[0m b_values[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m'\u001b[39m][:] \u001b[38;5;241m=\u001b[39m b_values[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mB\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'isin'" + ] + } + ], + "source": [ + "# lines 2647 to 2656 -> skipping them for now\n", + "#if b_values['B'] is not None:\n", + " # if 'names' in b_values:\n", + " # b_values['B'] = b_values['B'][b_values['names'].isin(b_values['B'].names)]\n", + " # b_values['B'][:] = b_values['B']\n", + " # else:\n", + " # b_values['B'][:] = b_values['B']\n", + " # b_values['names'] = b_values['B'].names" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'creator' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[37], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m adam_created \u001b[38;5;241m=\u001b[39m \u001b[43mcreator\u001b[49m(\n\u001b[1;32m 2\u001b[0m ets_model \u001b[38;5;241m=\u001b[39m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mets_model\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 3\u001b[0m e_type \u001b[38;5;241m=\u001b[39m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124merror_type\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 4\u001b[0m t_type \u001b[38;5;241m=\u001b[39m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrend_type\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 5\u001b[0m s_type \u001b[38;5;241m=\u001b[39m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mseason_type\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 6\u001b[0m model_is_trendy \u001b[38;5;241m=\u001b[39m adam_architect[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_is_trendy\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 7\u001b[0m model_is_seasonal \u001b[38;5;241m=\u001b[39m adam_architect[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel_is_seasonal\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 8\u001b[0m lags \u001b[38;5;241m=\u001b[39m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlags\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 9\u001b[0m lags_model \u001b[38;5;241m=\u001b[39m adam_architect[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlags_model\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 10\u001b[0m lags_model_arima \u001b[38;5;241m=\u001b[39m params[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlags_model_arima\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 11\u001b[0m lags_model_all \u001b[38;5;241m=\u001b[39m adam_architect[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlags_model_all\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 12\u001b[0m lags_model_max \u001b[38;5;241m=\u001b[39m adam_architect[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlags_model_max\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 13\u001b[0m \n\u001b[1;32m 14\u001b[0m \n\u001b[1;32m 15\u001b[0m \n\u001b[1;32m 16\u001b[0m )\n", + "\u001b[0;31mNameError\u001b[0m: name 'creator' is not defined" + ] + } + ], + "source": [ + "adam_created = creator(\n", + " ets_model = params['ets_model'],\n", + " e_type = params['error_type'],\n", + " t_type = params['trend_type'],\n", + " s_type = params['season_type'],\n", + " model_is_trendy = adam_architect['model_is_trendy'],\n", + " model_is_seasonal = adam_architect['model_is_seasonal'],\n", + " lags = params['lags'],\n", + " lags_model = adam_architect['lags_model'],\n", + " lags_model_arima = params['lags_model_arima'],\n", + " lags_model_all = adam_architect['lags_model_all'],\n", + " lags_model_max = adam_architect['lags_model_max'],\n", + "\n", + "\n", + "\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (953245190.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[23], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m arch_results =\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], + "source": [ + "arch_results = " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From f50e8ff50ab18be388904131f00629f52d7af92a Mon Sep 17 00:00:00 2001 From: Filotas Theodosiou <70523417+FilTheo@users.noreply.github.com> Date: Fri, 24 Jan 2025 17:49:38 +0200 Subject: [PATCH 3/3] updated core functions --- python/smooth/adam_general/core/adam.py | 715 +++++-- python/smooth/adam_general/core/checker.py | 1753 +++++++++++++++++ python/smooth/adam_general/core/creator.py | 1100 +++++------ python/smooth/adam_general/core/estimator.py | 1402 ++++++------- .../adam_general/core/utils/cost_functions.py | 534 ++--- python/smooth/adam_general/core/utils/dump.py | 71 + python/smooth/adam_general/core/utils/ic.py | 13 +- .../utils/ic.py\357\200\272Zone.Identifier" | 2 + .../adam_general/core/utils/likelihood.py | 0 .../adam_general/core/utils/polynomials.py | 8 + 10 files changed, 3928 insertions(+), 1670 deletions(-) create mode 100644 python/smooth/adam_general/core/checker.py create mode 100644 python/smooth/adam_general/core/utils/dump.py create mode 100644 "python/smooth/adam_general/core/utils/ic.py\357\200\272Zone.Identifier" create mode 100644 python/smooth/adam_general/core/utils/likelihood.py create mode 100644 python/smooth/adam_general/core/utils/polynomials.py diff --git a/python/smooth/adam_general/core/adam.py b/python/smooth/adam_general/core/adam.py index 165e1c59..ab7ec4b6 100644 --- a/python/smooth/adam_general/core/adam.py +++ b/python/smooth/adam_general/core/adam.py @@ -1,193 +1,584 @@ import time import warnings -from adam_profile import parameters_checker - -class Adam: - def __init__(self, model="ZXZ", lags=None, orders=None, constant=False, formula=None, - regressors=["use", "select", "adapt"], - occurrence=["none", "auto", "fixed", "general", "odds-ratio", "inverse-odds-ratio", "direct"], - distribution=["default", "dnorm", "dlaplace", "ds", "dgnorm", "dlnorm", "dinvgauss", "dgamma"], - loss=["likelihood", "MSE", "MAE", "HAM", "LASSO", "RIDGE", "MSEh", "TMSE", "GTMSE", "MSCE"], - outliers=["ignore", "use", "select"], level=0.99, - h=0, holdout=False, - persistence=None, phi=None, initial=["optimal", "backcasting", "complete"], arma=None, - ic=["AICc", "AIC", "BIC", "BICc"], bounds=["usual", "admissible", "none"], - silent=True, **kwargs): +from core.checker import parameters_checker +from core.estimator import estimator, selector, preparator +from core.creator import creator, initialiser, architector, filler +from core.utils.ic import ic_function +import numpy as np +import pandas as pd + +from smooth.adam_general._adam_general import adam_fitter, adam_forecaster + +class Adam(object): + # Note: I dont know what else parameters to include here! + # Will be discussed + def __init__(self, model, lags, + + profiles_recent_provided = False, + profiles_recent_table = None, + orders = None, + constant = False, + outliers = "ignore", + level = 0.99, + persistence = None, + phi = None, + initial = None, + distribution = "default", + loss = "likelihood", + + occurrence = "none", + ic = "AICc", + bounds = "usual", + silent = False, + multisteps = None, + lb = None, + ub = None, + print_level = 1, + max_eval = None): + + # Start measuring the time of calculations + self.start_time = time.time() + self.model = model self.lags = lags + self.profiles_recent_provided = profiles_recent_provided + self.profiles_recent_table = profiles_recent_table self.orders = orders self.constant = constant - self.formula = formula - self.regressors = regressors - self.occurrence = occurrence - self.distribution = distribution - self.loss = loss self.outliers = outliers self.level = level - self.h = h - self.holdout = holdout self.persistence = persistence self.phi = phi self.initial = initial - self.arma = arma + self.distribution = distribution + self.loss = loss + self.occurrence = occurrence self.ic = ic self.bounds = bounds self.silent = silent - self.kwargs = kwargs - - self.profiles_recent_provided = False - self.profiles_recent_table = None - self.initial_estimated = None - self.B = None - self.loss_value = None - self.other = {} - - self.elipsis = dict() + self.multisteps = multisteps + self.lb = lb + self.ub = ub + self.print_level = print_level + self.max_eval = max_eval + # what else should take place in the init? - # If a previous model is provided as a model, write down the variables - if isinstance(self.model, (Adam, AdamSimulation)): - self._handle_previous_model() + def fit(self, ts, h = None, + holdout = False, + model_do = "estimate", + fast = False, + models_pool = None, + lambda_param = None, + frequency = None): - elif isinstance(self.model, ETS): - self._init_from_ets() + self.h = h + self.holdout = holdout + self.model_do = model_do + self.fast = fast + self.models_pool = models_pool + self.lambda_param = lambda_param + self.frequency = frequency + + # first checking the parameters + # This should be more pretty + (self.general, + self.observations_dict, + self.persistence_results, + self.initials_results, + self.arima_results, + self.constant_dict, + self.model_type_dict, + self.components_dict, + self.lags_dict, + self.occurrence_dict, + self.phi_dict, + self.explanatory_dict, + self.params_info) = parameters_checker(ts, model=self.model, + lags=self.lags, + orders=self.orders, + constant=self.constant, + outliers=self.outliers, + level=self.level, + persistence=self.persistence, + phi=self.phi, + initial=self.initial, + distribution=self.distribution, + loss=self.loss, + h=self.h, + holdout=self.holdout, + occurrence=self.occurrence, + ic=self.ic, + bounds=self.bounds, + silent=self.silent, + model_do=self.model_do, + fast=self.fast, + models_pool=self.models_pool, + lambda_param=self.lambda_param, + frequency=self.frequency) - elif isinstance(self.model, str): - pass # Everything is okay - else: - warnings.warn("A model of an unknown class was provided. Switching to 'ZZZ'.", UserWarning) - self.model = "ZZZ" - - # Check the parameters of the function and create variables based on them - checker_return = parameters_checker( - data=data, model=self.model, lags=self.lags, formula=self.formula, - orders=self.orders, constant=self.constant, arma=self.arma, - outliers=self.outliers, level=self.level, - persistence=self.persistence, phi=self.phi, initial=self.initial, - distribution=self.distribution, loss=self.loss, h=self.h, - holdout=self.holdout, occurrence=self.occurrence, ic=self.ic, - bounds=self.bounds, regressors=self.regressors, y_name=y_name, - silent=self.silent, model_do="", - parent_environment=locals(), ellipsis=ellipsis, fast=False - ) - - - #### If select was provided in the model, do auto.adam selection #### - # here I need to implement the auto.adam selection. - # Will do it in the future - # lines 690 - 700 in adam.R - + # line 534 -> use regression here + # line 677 -> do the adam selection here - def _init_from_ets(self): - self.components = self.model.components # when I do the ETS - coeffs = self.model.coef() - self.persistence = coeffs['persistence'] - self.phi = coeffs.get('phi') - self.initial = coeffs['initial'] - self.lags = [1] - - if self.components[1] != "N": - self.lags.append(1) - if self.components[2] != "N": - self.lags.append(self.model.m) - - self.model = self.model.model_type() - self.distribution = "dnorm" - self.loss = "likelihood" - - def coef(self): - if isinstance(self.model, ETS): - coefficients = { - 'persistence': self.persistence, - 'initial': self.initial - } - if self.phi is not None: - coefficients['phi'] = self.phi - - # Add level, trend, and seasonal components if present - if self.components[0] != "N": - coefficients['level'] = self.initial[0] - if self.components[1] != "N": - coefficients['trend'] = self.initial[1] - if self.components[2] != "N": - seasonal_index = 2 if self.components[1] != "N" else 1 - coefficients['seasonal'] = self.initial[seasonal_index:] + # then lines 4033 to 4070 deals with the occurence model + # this will also wait for a bit + + # then I also skip the regression data on lines 4036 + # I also skip the number of parameters on line 4070 + + # line 4099 we continue: + if self.model_type_dict["model_do"] == "estimate": + # If this is LASSO/RIDGE with lambda=1, use MSE to estimate initials only + lambda_original = self.general['lambda'] + if self.general['loss'] in ["LASSO", "RIDGE"] and self.general['lambda'] == 1: + if self.model_type_dict["ets_model"]: + # Pre-set ETS parameters + self.persistence_results["persistence_estimate"] = False + self.persistence_results["persistence_level_estimate"] = False + self.persistence_results["persistence_trend_estimate"] = False + self.persistence_results["persistence_seasonal_estimate"] = [False] + self.persistence_results["persistence_level"] = 0 + self.persistence_results["persistence_trend"] = 0 + self.persistence_results["persistence_seasonal"] = [0] + # Phi + self.phi_dict["phi_estimate"] = False + self.phi_dict["phi"] = 1 + + if self.model_type_dict["xreg_model"]: + # ETSX parameters + self.persistence_results["persistence_xreg_estimate"] = False + self.persistence_results["persistence_xreg"] = 0 + + if self.model_type_dict["arima_model"]: + # Pre-set ARMA parameters + self.arima_results["ar_estimate"] = [False] + self.arima_results["ma_estimate"] = [False] + arma_parameters = [] + j = 0 + for i, lag in enumerate(self.lags_dict["lags"]): + if self.arima_results["ar_orders"][i] > 0: + arma_parameters.extend([1] * self.arima_results["ar_orders"][i]) + j += self.arima_results["ar_orders"][i] + if self.arima_results["ma_orders"][i] > 0: + arma_parameters.extend([0] * self.arima_results["ma_orders"][i]) + j += self.arima_results["ma_orders"][i] + self.arima_results["arma_parameters"] = arma_parameters + + self.general['lambda'] = 0 + + # Estimate the model + self.adam_estimated = estimator( + general_dict=self.general, + model_type_dict=self.model_type_dict, + lags_dict=self.lags_dict, + observations_dict=self.observations_dict, + arima_dict=self.arima_results, + constant_dict=self.constant_dict, + explanatory_dict=self.explanatory_dict, + profiles_recent_table=self.profiles_recent_table, + profiles_recent_provided=self.profiles_recent_provided, + persistence_dict=self.persistence_results, + initials_dict=self.initials_results, + occurrence_dict=self.occurrence_dict, + phi_dict=self.phi_dict, + components_dict=self.components_dict, + ) + + # Build the architector + (self.model_type_dict, + self.components_dict, + self.lags_dict, + self.observations_dict, + self.profile_dict) = architector( + model_type_dict=self.model_type_dict, + lags_dict=self.lags_dict, + observations_dict=self.observations_dict, + arima_checked=self.arima_results, + constants_checked=self.constant_dict, + explanatory_checked=self.explanatory_dict, + profiles_recent_table=self.profiles_recent_table, + profiles_recent_provided=self.profiles_recent_provided + ) - return coefficients - return {} + # Build the creator + self.adam_created = creator( + model_type_dict=self.model_type_dict, + lags_dict=self.lags_dict, + profiles_dict=self.profile_dict, + observations_dict=self.observations_dict, + persistence_checked=self.persistence_results, + initials_checked=self.initials_results, + arima_checked=self.arima_results, + constants_checked=self.constant_dict, + phi_dict=self.phi_dict, + components_dict=self.components_dict, + explanatory_checked=self.explanatory_dict + ) - def fit(self, data): - # Start measuring the time of calculations - start_time = time.time() + # Calculate IC + self.ic_selection = ic_function(self.general['ic'], + self.adam_estimated['log_lik_adam_value']) - # Get the call information - ellipsis = self.kwargs + # Update parameters number + self.n_param_estimated = self.adam_estimated['n_param_estimated'] - # Is it useful? - y_name = str(data) + # Initialize parameters_number in general if not already present + if 'parameters_number' not in self.general: + self.general['parameters_number'] = self.params_info['parameters_number'] + self.general['parameters_number'][0][0] = self.n_param_estimated + # Handle likelihood loss case + if self.general['loss'] == 'likelihood': + if len(self.general['parameters_number'][0]) <= 3: + self.general['parameters_number'][0].append(1) + else: + self.general['parameters_number'][0][3] = 1 + + # Calculate row sums + if len(self.general['parameters_number'][0]) <= 4: + self.general['parameters_number'][0].append(sum(self.general['parameters_number'][0][0:4])) + self.general['parameters_number'][1].append(sum(self.general['parameters_number'][1][0:4])) + else: + self.general['parameters_number'][0][4] = sum(self.general['parameters_number'][0][0:4]) + self.general['parameters_number'][1][4] = sum(self.general['parameters_number'][1][0:4]) + + elif self.model_type_dict["model_do"] == "select": + # Run model selection + self.adam_selected = selector( + model_type_dict=self.model_type_dict, + phi_dict=self.phi_dict, + general=self.general, + lags_dict=self.lags_dict, + observations_dict=self.observations_dict, + arima_results=self.arima_results, + constant_dict=self.constant_dict, + explanatory_dict=self.explanatory_dict, + occurrence_dict=self.occurrence_dict, + components_dict=self.components_dict, + profiles_recent_table=self.profiles_recent_table, + profiles_recent_provided=self.profiles_recent_provided, + persistence_results=self.persistence_results, + initials_results=self.initials_results, + criterion=self.ic, + silent=self.silent + ) + + # Get selection results + self.ic_selection = self.adam_selected['ic_selection'] + self.results = self.adam_selected['results'] + + # Find best model + self.best_model = min(self.ic_selection.items(), key=lambda x: x[1])[0] + self.best_id = next(i for i, result in enumerate(self.results) + if result['model'] == self.best_model) + + # Update dictionaries with best model results + self.model_type_dict = self.results[self.best_id]['model_type_dict'] + self.phi_dict = self.results[self.best_id]['phi_dict'] + self.adam_estimated = self.results[self.best_id]['adam_estimated'] + + # Build the architector + (self.model_type_dict, + self.components_dict, + self.lags_dict, + self.observations_dict, + self.profile_dict) = architector( + model_type_dict=self.model_type_dict, + lags_dict=self.lags_dict, + observations_dict=self.observations_dict, + arima_checked=self.arima_results, + constants_checked=self.constant_dict, + explanatory_checked=self.explanatory_dict, + profiles_recent_table=self.profiles_recent_table, + profiles_recent_provided=self.profiles_recent_provided + ) + + # Build the creator + self.adam_created = creator( + model_type_dict=self.model_type_dict, + lags_dict=self.lags_dict, + profiles_dict=self.profile_dict, + observations_dict=self.observations_dict, + persistence_checked=self.persistence_results, + initials_checked=self.initials_results, + arima_checked=self.arima_results, + constants_checked=self.constant_dict, + phi_dict=self.phi_dict, + components_dict=self.components_dict, + explanatory_checked=self.explanatory_dict + ) + + # Update parameters number + self.n_param_estimated = self.adam_estimated['n_param_estimated'] + self.general['parameters_number'] = self.params_info['parameters_number'] + self.general['parameters_number'][0][0] = self.n_param_estimated + + # Handle likelihood loss case + if self.general['loss'] == 'likelihood': + if len(self.general['parameters_number'][0]) <= 3: + self.general['parameters_number'][0].append(1) + else: + self.general['parameters_number'][0][3] = 1 + + # Calculate row sums + if len(self.general['parameters_number'][0]) <= 4: + self.general['parameters_number'][0].append(sum(self.general['parameters_number'][0][0:4])) + self.general['parameters_number'][1].append(sum(self.general['parameters_number'][1][0:4])) + else: + self.general['parameters_number'][0][4] = sum(self.general['parameters_number'][0][0:4]) + self.general['parameters_number'][1][4] = sum(self.general['parameters_number'][1][0:4]) + + elif self.model_type_dict["model_do"] == "combine": + # Store original model configuration + model_original = self.model_type_dict.copy() + + # If models pool is not provided, create one + if self.general['models_pool'] is None: + # Define the whole pool of errors + if not self.model_type_dict['allow_multiplicative']: + pool_errors = ['A'] + pool_trends = ['N', 'A', 'Ad'] + pool_seasonals = ['N', 'A'] + else: + pool_errors = ['A', 'M'] + pool_trends = ['N', 'A', 'Ad', 'M', 'Md'] + pool_seasonals = ['N', 'A', 'M'] + + # If error_type is not Z, check on additive errors + if self.model_type_dict['error_type'] != 'Z': + if self.model_type_dict['error_type'] == 'N': + pool_errors = ['N'] + elif self.model_type_dict['error_type'] in ['A', 'X']: + pool_errors = ['A'] + elif self.model_type_dict['error_type'] in ['M', 'Y']: + pool_errors = ['M'] + + # If trend_type is not Z, create pool with specified type + if self.model_type_dict['trend_type'] != 'Z': + if self.model_type_dict['trend_type'] == 'N': + pool_trends = ['N'] + elif self.model_type_dict['trend_type'] == 'A': + pool_trends = ['Ad' if self.model_type_dict['damped'] else 'A'] + elif self.model_type_dict['trend_type'] == 'M': + pool_trends = ['Md' if self.model_type_dict['damped'] else 'M'] + elif self.model_type_dict['trend_type'] == 'X': + pool_trends = ['N', 'A', 'Ad'] + elif self.model_type_dict['trend_type'] == 'Y': + pool_trends = ['N', 'M', 'Md'] + + # If season_type is not Z, create specific pools + if self.model_type_dict['season_type'] != 'Z': + if self.model_type_dict['season_type'] == 'N': + pool_seasonals = ['N'] + elif self.model_type_dict['season_type'] == 'A': + pool_seasonals = ['A'] + elif self.model_type_dict['season_type'] == 'X': + pool_seasonals = ['N', 'A'] + elif self.model_type_dict['season_type'] == 'M': + pool_seasonals = ['M'] + elif self.model_type_dict['season_type'] == 'Y': + pool_seasonals = ['N', 'M'] + + # Create models pool by combining all possibilities + self.general['models_pool'] = [e + t + s for e in pool_errors + for t in pool_trends + for s in pool_seasonals] + + # Run model selection + self.adam_selected = selector( + model_type_dict=self.model_type_dict, + phi_dict=self.phi_dict, + general=self.general, + lags_dict=self.lags_dict, + observations_dict=self.observations_dict, + arima_results=self.arima_results, + constant_dict=self.constant_dict, + explanatory_dict=self.explanatory_dict, + occurrence_dict=self.occurrence_dict, + components_dict=self.components_dict, + profiles_recent_table=self.profiles_recent_table, + profiles_recent_provided=self.profiles_recent_provided, + persistence_results=self.persistence_results, + initials_results=self.initials_results, + criterion="AICc", + silent=False + ) + + # Calculate weights based on IC + ic_best = min(self.adam_selected['ic_selection'].values()) + ic_weights = {model: np.exp(-0.5 * (ic - ic_best)) + for model, ic in self.adam_selected['ic_selection'].items()} + weights_sum = sum(ic_weights.values()) + ic_weights = {model: weight/weights_sum for model, weight in ic_weights.items()} + + # Set very small weights to 0 as a failsafe + ic_weights = {model: 0 if weight < 1e-5 else weight + for model, weight in ic_weights.items()} + weights_sum = sum(ic_weights.values()) + ic_weights = {model: weight/weights_sum for model, weight in ic_weights.items()} + + # Store weights in selected results + self.adam_selected['ic_weights'] = ic_weights + + # Process each selected model + for i in range(len(self.adam_selected['results'])): + result = self.adam_selected['results'][i] + + # Build architector for this model + (self.model_type_dict, + self.components_dict, + self.lags_dict, + self.observations_dict, + self.profile_dict) = architector( + model_type_dict=result['model_type_dict'], + lags_dict=self.lags_dict, + observations_dict=self.observations_dict, + arima_checked=self.arima_results, + constants_checked=self.constant_dict, + explanatory_checked=self.explanatory_dict, + profiles_recent_table=self.profiles_recent_table, + profiles_recent_provided=self.profiles_recent_provided + ) + + # Store updated dictionaries in results + self.adam_selected['results'][i].update({ + 'model_type_dict': self.model_type_dict, + 'components_dict': self.components_dict, + 'lags_dict': self.lags_dict, + 'observations_dict': self.observations_dict, + 'profile_dict': self.profile_dict + }) + + # Create matrices for this model + self.adam_created = creator( + model_type_dict=self.model_type_dict, + lags_dict=self.lags_dict, + profiles_dict=self.profile_dict, + observations_dict=self.observations_dict, + persistence_checked=self.persistence_results, + initials_checked=self.initials_results, + arima_checked=self.arima_results, + constants_checked=self.constant_dict, + phi_dict=self.phi_dict, + components_dict=self.components_dict, + explanatory_checked=self.explanatory_dict + ) + + # Store created matrices + self.adam_selected['results'][i]['adam_created'] = self.adam_created + + # Update parameters number + n_param_estimated = result['adam_estimated']['n_param_estimated'] + self.general['parameters_number'] = self.params_info['parameters_number'] + self.general['parameters_number'][0][0] = n_param_estimated + + # Handle likelihood loss case + if self.general['loss'] == 'likelihood': + if len(self.general['parameters_number'][0]) <= 3: + self.general['parameters_number'][0].append(1) + else: + self.general['parameters_number'][0][3] = 1 + + # Calculate row sums + if len(self.general['parameters_number'][0]) <= 4: + self.general['parameters_number'][0].append(sum(self.general['parameters_number'][0][0:4])) + self.general['parameters_number'][1].append(sum(self.general['parameters_number'][1][0:4])) + else: + self.general['parameters_number'][0][4] = sum(self.general['parameters_number'][0][0:4]) + self.general['parameters_number'][1][4] = sum(self.general['parameters_number'][1][0:4]) + + # Store parameters number + self.adam_selected['results'][i]['parameters_number'] = self.general['parameters_number'] - def _handle_previous_model(self): - # If this is the simulated data, extract the parameters - # TODO: Handle simulated data case if needed - - self.initial = self.model.initial - self.initial_estimated = self.model.initial_estimated - self.distribution = self.model.distribution - self.loss = self.model.loss - self.persistence = self.model.persistence - self.phi = self.model.phi - - if self.model.initial_type != "complete": - self.initial = self.model.initial else: - self.initial = "b" - - self.occurrence = self.model.occurrence - self.ic = self.model.ic - self.bounds = self.model.bounds - - # lambda for LASSO - self.ellipsis['lambda'] = self.model.other.get('lambda') - - # parameters for distributions - self.ellipsis['alpha'] = self.model.other.get('alpha') - self.ellipsis['shape'] = self.model.other.get('shape') - self.ellipsis['nu'] = self.model.other.get('nu') - self.B = self.model.B - - self.loss_value = self.model.loss_value - self.log_lik_adam_value = self.model.loglik() - self.lags_model_all = self.model.model_lags() - - # This needs to be fixed to align properly in case of various seasonals - self.profiles_recent_table = self.model.profile_initial - self.profiles_recent_provided = True - - self.regressors = self.model.regressors - - if self.formula is None: - self.formula = self.model.formula() - - # Parameters of the original ARIMA model - self.lags = self.model.lags() - self.orders = self.model.orders() - self.constant = self.model.constant - - if self.constant is None: - self.constant = False - - self.arma = self.model.arma + warnings.warn(f"Unknown model_do value: {self.model_type_dict['model_do']}. Expected one of: 'estimate', 'select', 'combine'") + + # Transform data into appropriate classes + if isinstance(self.observations_dict['y_in_sample'], np.ndarray): + self.y_in_sample = pd.Series( + self.observations_dict['y_in_sample'], + index=pd.date_range( + start=self.observations_dict['y_start'], + periods=len(self.observations_dict['y_in_sample']), + freq=self.observations_dict['frequency'] + ) + ) + if self.general['holdout']: + self.y_holdout = pd.Series( + self.observations_dict['y_holdout'], + index=pd.date_range( + start=self.observations_dict['y_forecast_start'], + periods=len(self.observations_dict['y_holdout']), + freq=self.observations_dict['frequency'] + ) + ) + else: + self.y_in_sample = self.observations_dict['y_in_sample'].copy() + if self.general['holdout']: + self.y_holdout = pd.Series( + self.observations_dict['y_holdout'], + index=self.observations_dict['y_forecast_index'] + ) + + # Handle distribution selection + if self.general['distribution'] == "default": + if self.general['loss'] == "likelihood": + if self.model_type_dict['error_type'] == "A": + self.general['distribution_new'] = "dnorm" + elif self.model_type_dict['error_type'] == "M": + self.general['distribution_new'] = "dgamma" + elif self.general['loss'] in ["MAEh", "MACE", "MAE"]: + self.general['distribution_new'] = "dlaplace" + elif self.general['loss'] in ["HAMh", "CHAM", "HAM"]: + self.general['distribution_new'] = "ds" + elif self.general['loss'] in ["MSEh", "MSCE", "MSE", "GPL"]: + self.general['distribution_new'] = "dnorm" + else: + self.general['distribution_new'] = self.general['distribution'] + + + return self + + + def predict(self): + """Make predictions using the fitted model""" - self.model = self.model.model_type() - model_do = "use" + out = preparator( + # Model info + model_type_dict=self.model_type_dict, + + # Components info + components_dict=self.components_dict, + + # Lags info + lags_dict=self.lags_dict, + + # Matrices from creator + matrices_dict=self.adam_created, + + # Parameter dictionaries + persistence_checked=self.persistence_results, + initials_checked=self.initials_results, + arima_checked=self.arima_results, + explanatory_checked=self.explanatory_dict, + phi_dict=self.phi_dict, + constants_checked=self.constant_dict, + + # Other parameters + observations_dict=self.observations_dict, + occurrence_dict=self.occurrence_dict, + general_dict=self.general, + profiles_dict=self.profile_dict, + + # The parameter vector + adam_estimated=self.adam_estimated, + + # Optional parameters + bounds="usual", + other=None + ) - # TODO: Uncomment if needed - # if "C" in self.model: - # self.initial = "o" - - # Further fitting logic goes here - # ... + return out \ No newline at end of file diff --git a/python/smooth/adam_general/core/checker.py b/python/smooth/adam_general/core/checker.py new file mode 100644 index 00000000..0eb6ded0 --- /dev/null +++ b/python/smooth/adam_general/core/checker.py @@ -0,0 +1,1753 @@ +import numpy as np +import pandas as pd + +def _warn(msg, silent=False): + """Helper to show warnings in a style closer to R.""" + if not silent: + print(f"Warning: {msg}") + +def _check_occurrence(data, occurrence, frequency = None, silent=False): + """ + Check / handle 'occurrence' parameter similarly to R code. + Return a dict with occurrence details and nonzero counts. + """ + data_list = list(data) if not isinstance(data, list) else data + obs_in_sample = len(data_list) + # Identify non-zero observations + nonzero_indices = [i for i, val in enumerate(data_list) if val is not None and val != 0] + obs_nonzero = len(nonzero_indices) + # If all zeroes, fallback + if all(val == 0 for val in data_list): + _warn("You have a sample with zeroes only. Your forecast will be zero.", silent) + return { + "occurrence": "none", + "occurrence_model": False, + "obs_in_sample": obs_in_sample, + "obs_nonzero": 0 + } + + # Validate the occurrence choice + valid_occ = ["none","auto","fixed","general","odds-ratio", + "inverse-odds-ratio","direct","provided"] + if occurrence not in valid_occ: + _warn(f"Invalid occurrence: {occurrence}. Switching to 'none'.", silent) + occurrence = "none" + + occurrence_model = (occurrence not in ["none","provided"]) + return { + "occurrence": occurrence, + "occurrence_model": occurrence_model, + "obs_in_sample": obs_in_sample, + "obs_nonzero": obs_nonzero + } + +def _check_lags(lags, obs_in_sample, silent=False): + """ + Validate or tweak the set of lags. Force at least lag=1, remove zeros if any. + Warn if largest lag >= obs_in_sample. + Return dictionary with lags info including seasonal lags. + """ + # Remove any zero-lags + lags = [lg for lg in lags if lg != 0] + # Force 1 in lags (for level) + if 1 not in lags: + lags.insert(0, 1) + # Must be positive + if any(lg <= 0 for lg in lags): + raise ValueError("Right! Why don't you try complex lags then, mister smart guy? (Lag <= 0 given)") + + # Create lagsModel (matrix in R, list here) + lags_model = sorted(set(lags)) + + # Get seasonal lags (all lags > 1) + lags_model_seasonal = [lag for lag in lags_model if lag > 1] + + max_lag = max(lags) if lags else 1 + if max_lag >= obs_in_sample: + _warn(f"The maximum lags value is {max_lag}, while sample size is {obs_in_sample}. I cannot guarantee that I'll be able to fit the model.", silent) + + return { + "lags": sorted(set(lags)), + "lags_model": lags_model, + "lags_model_seasonal": lags_model_seasonal, + "lags_length": len(lags_model), + "max_lag": max_lag + } + +def _expand_component_code(comp_char, allow_multiplicative=True): + """ + Expand a single component character into a list of valid possibilities, + following the R approach more fully: + - 'Z' => ['A','M','N'] (if multiplicative allowed) or ['A','N'] otherwise + - 'F' => "full" => ['A','M','N'] if multiplicative allowed + - 'P' => "pure" => ['A','M'] if multiplicative allowed, else ['A'] + - 'C' => "combine all" => we treat like 'Z' expansions but mark combine + - 'X','Y' => specialized expansions to A or M + - 'N','A','M' => remain as-is + """ + possible = set() + # Handle each special case: + if comp_char == 'Z': + # Expand to A,N + M if allowed + possible.update(['A','N']) + if allow_multiplicative: + possible.add('M') + elif comp_char == 'C': + # "C" is effectively "combine all" in R. We'll expand the same way as 'Z' + # but in _build_models_pool_from_components we will detect that we've used 'C' + # and set combined_mode = True. + possible.update(['A','N']) + if allow_multiplicative: + possible.add('M') + elif comp_char == 'F': + # "full" => A, N, plus M if multiplicative + possible.update(['A','N']) + if allow_multiplicative: + possible.add('M') + elif comp_char == 'P': + # "pure" => A plus M if multiplicative + possible.update(['A']) + if allow_multiplicative: + possible.add('M') + elif comp_char == 'X': + # R logic converts X->A + possible.update(['A']) + elif comp_char == 'Y': + # R logic converts Y->M if allowed, else A + if allow_multiplicative: + possible.update(['M']) + else: + possible.update(['A']) + else: + # If it's one of 'A','M','N' or an unknown letter, just return that + possible.add(comp_char) + return list(possible) + +def _build_models_pool_from_components(error_type, trend_type, season_type, damped, allow_multiplicative): + """ + Build a models pool by fully enumerating expansions for E,T,S if any of them + are Z, F, P, or C. If 'C' appears, we also set combined_mode = True. + This more closely replicates the R approach of enumerating candidate models. + """ + err_options = _expand_component_code(error_type, allow_multiplicative) + trend_options = _expand_component_code(trend_type, allow_multiplicative) + seas_options = _expand_component_code(season_type, allow_multiplicative) + + # Check if 'C' is in any expansions => combined_mode + combined_mode = ('C' in error_type or 'C' in trend_type or 'C' in season_type) + + # Build candidate models + candidate_models = [] + for e in err_options: + for t in trend_options: + for s in seas_options: + # Add 'd' if damped + if damped and t not in ['N']: + candidate_models.append(f"{e}{t}d{s}") + else: + candidate_models.append(f"{e}{t}{s}") + + candidate_models = list(set(candidate_models)) # unique + candidate_models.sort() + return candidate_models, combined_mode + +def _check_model_composition(model_str, allow_multiplicative=True, silent=False): + """Parse and validate model composition string. + + Args: + model_str: String like "ANN", "ZZZ", etc. representing model components + allow_multiplicative: Whether multiplicative models are allowed + silent: Whether to suppress warnings + + Returns: + Dictionary with model components and configuration + """ + # Initialize defaults + error_type = trend_type = season_type = "N" + damped = False + model_do = "estimate" + models_pool = None + + # Validate model string + if not isinstance(model_str, str): + if not silent: + _warn(f"Invalid model type: {model_str}. Should be a string. Switching to 'ZZZ'.") + model_str = "ZZZ" + + # Handle 4-character models (with damping) + if len(model_str) == 4: + error_type = model_str[0] + trend_type = model_str[1] + season_type = model_str[3] + if model_str[2] != 'd': + if not silent: + _warn(f"Invalid damped trend specification in {model_str}. Using 'd'.") + damped = True + + # Handle 3-character models + elif len(model_str) == 3: + error_type = model_str[0] + trend_type = model_str[1] + season_type = model_str[2] + damped = trend_type in ["Z", "X", "Y"] + + else: + if not silent: + _warn(f"Invalid model string length: {model_str}. Switching to 'ZZZ'.") + model_str = "ZZZ" + error_type = trend_type = season_type = "Z" + damped = True + + # Validate components + valid_error = ["Z", "X", "Y", "A", "M", "C", "N"] + valid_trend = ["Z", "X", "Y", "N", "A", "M", "C"] + valid_season = ["Z", "X", "Y", "N", "A", "M", "C"] + + if error_type not in valid_error: + if not silent: + _warn(f"Invalid error type: {error_type}. Switching to 'Z'.") + error_type = "Z" + model_do = "select" + + if trend_type not in valid_trend: + if not silent: + _warn(f"Invalid trend type: {trend_type}. Switching to 'Z'.") + trend_type = "Z" + model_do = "select" + + if season_type not in valid_season: + if not silent: + _warn(f"Invalid seasonal type: {season_type}. Switching to 'Z'.") + season_type = "Z" + model_do = "select" + + # Handle model selection/combination mode + if "C" in [error_type, trend_type, season_type]: + model_do = "combine" + # Replace C with Z for actual fitting + if error_type == "C": + error_type = "Z" + if trend_type == "C": + trend_type = "Z" + if season_type == "C": + season_type = "Z" + elif any(c in ["Z", "X", "Y", "F", "P"] for c in [error_type, trend_type, season_type]): + model_do = "select" + + # Handle multiplicative restrictions + if not allow_multiplicative: + if error_type == "M": + error_type = "A" + if trend_type == "M": + trend_type = "A" + if season_type == "M": + season_type = "A" + if error_type == "Y": + error_type = "X" + if trend_type == "Y": + trend_type = "X" + if season_type == "Y": + season_type = "X" + + # Generate models pool for selection/combination if needed + if model_do in ["select", "combine"]: + models_pool = _generate_models_pool(error_type, trend_type, season_type, + allow_multiplicative, silent) + + return { + "error_type": error_type, + "trend_type": trend_type, + "season_type": season_type, + "damped": damped, + "model_do": model_do, + "models_pool": models_pool + } + +def _generate_models_pool(error_type, trend_type, season_type, allow_multiplicative, silent=False): + """Generate pool of candidate models based on components.""" + pool = [] + + # Handle full pool case ("FFF") + if "F" in [error_type, trend_type, season_type]: + pool = ["ANN", "AAN", "AAdN", "AMN", "AMdN", + "ANA", "AAA", "AAdA", "AMA", "AMdA", + "ANM", "AAM", "AAdM", "AMM", "AMdM"] + if allow_multiplicative: + pool.extend([ + "MNN", "MAN", "MAdN", "MMN", "MMdN", + "MNA", "MAA", "MAdA", "MMA", "MMdA", + "MNM", "MAM", "MAdM", "MMM", "MMdM" + ]) + + # Handle pure models case ("PPP") + elif "P" in [error_type, trend_type, season_type]: + pool = ["ANN", "AAN", "AAdN", "ANA", "AAA", "AAdA"] + if allow_multiplicative: + pool.extend(["MNN", "MMN", "MMdN", "MNM", "MMM", "MMdM"]) + + # Handle standard selection case + else: + # Generate based on provided components + error_options = ["A", "M"] if allow_multiplicative else ["A"] + trend_options = ["N", "A", "Ad"] if trend_type != "N" else ["N"] + season_options = ["N", "A"] if season_type != "N" else ["N"] + + if error_type in ["A", "M"]: + error_options = [error_type] + if trend_type in ["A", "M", "N"]: + trend_options = [trend_type] + if season_type in ["A", "M", "N"]: + season_options = [season_type] + + for e in error_options: + for t in trend_options: + for s in season_options: + if "d" in t: + pool.append(f"{e}{t[0]}d{s}") + else: + pool.append(f"{e}{t}{s}") + + return sorted(list(set(pool))) + +def _check_ets_model(model, distribution, data, silent=False): + """ + Full check for ETS logic. Return dictionary with error_type, trend_type, season_type, damped, ets_model... + If error_type='N', skip ETS model. + """ + + # Convert DataFrame/Series to numeric values if needed + if hasattr(data, 'values'): + data_values = data.values.flatten() if hasattr(data, 'flatten') else data.values + else: + data_values = data + + + # Check multiplicative components first + data_min = min(data_values) if hasattr(data, 'values') else min(data) + allow_multiplicative = True + if data_min <= 0 and distribution in ["dlnorm", "dgamma", "dinvgauss", "dllaplace", "dlgnorm", "dls"]: + allow_multiplicative = False + + # Now call _check_model_composition with all required arguments + model_info = _check_model_composition(model, allow_multiplicative, silent) + + # Extract components + error_type = model_info["error_type"] + trend_type = model_info["trend_type"] + season_type = model_info["season_type"] + damped = model_info["damped"] + + # If error_type='N', that means no ETS + if error_type == "N": + trend_type = "N" + season_type = "N" + damped = False + return { + "ets_model": False, + "error_type": "N", + "trend_type": "N", + "season_type": "N", + "damped": False, + "allow_multiplicative": False, + "model": "NNN" + } + + # Warn about multiplicative components if needed + if not allow_multiplicative and not silent and any(x == "M" for x in (error_type, trend_type, season_type)): + _warn("Your data contains non-positive values, so the ETS might break for multiplicative components.") + + # Construct final model string + if len(model) == 4: + model = f"{error_type}{trend_type}d{season_type}" + else: + model = f"{error_type}{trend_type}{season_type}" + + return { + "ets_model": True, + "model": model, + "error_type": error_type, + "trend_type": trend_type, + "season_type": season_type, + "damped": damped, + "allow_multiplicative": allow_multiplicative + } + +def _expand_orders(orders): + """Tiny helper to convert orders to lists: (ar, i, ma).""" + if isinstance(orders, dict): + ar_orders = orders.get("ar", []) + i_orders = orders.get("i", []) + ma_orders = orders.get("ma", []) + elif isinstance(orders, (list, tuple)) and len(orders) == 3: + ar_orders, i_orders, ma_orders = orders + else: + ar_orders, i_orders, ma_orders = 0, 0, 0 + if not isinstance(ar_orders, (list, tuple)): + ar_orders = [ar_orders] + if not isinstance(i_orders, (list, tuple)): + i_orders = [i_orders] + if not isinstance(ma_orders, (list, tuple)): + ma_orders = [ma_orders] + return ar_orders, i_orders, ma_orders + +def _check_arima(orders, validated_lags, silent=False): + """ + Validate ARIMA portion and return complete ARIMA information. + + Args: + orders: ARIMA orders (can be dict, list/tuple, or single values) + validated_lags: List of validated lags + silent: Whether to suppress warnings + + Returns: + dict: Complete ARIMA information including all components + """ + ar_orders, i_orders, ma_orders = _expand_orders(orders) + + # Sum orders to determine if ARIMA is needed + s_ar = sum(ar_orders) + s_i = sum(i_orders) + s_ma = sum(ma_orders) + arima_model = (s_ar + s_i + s_ma) > 0 + + # Initialize result dictionary + result = { + "arima_model": arima_model, + "ar_orders": ar_orders, + "i_orders": i_orders, + "ma_orders": ma_orders, + "ar_required": False, + "i_required": False, + "ma_required": False, + "ar_estimate": False, + "ma_estimate": False, + "arma_parameters": None, + "non_zero_ari": [], + "non_zero_ma": [], + "lags_model_arima": [], + "select": False + } + + if not arima_model: + return result + + # Set required flags based on orders + result["ar_required"] = (s_ar > 0) + result["i_required"] = (s_i > 0) + result["ma_required"] = (s_ma > 0) + + # Set estimation flags - if component is required, it needs to be estimated + result["ar_estimate"] = result["ar_required"] + result["ma_estimate"] = result["ma_required"] + + # Create ariValues and maValues lists + ari_values = [] + ma_values = [] + for i, lag in enumerate(validated_lags): + # AR and I orders combined + ari = [0] # Start with 0 + if ar_orders[i] > 0: + ari.extend(range(min(1, ar_orders[i]), ar_orders[i] + 1)) + if i_orders[i] > 0: + ari.extend(range(ar_orders[i] + 1, ar_orders[i] + i_orders[i] + 1)) + ari_values.append([x * lag for x in set(ari)]) + + # MA orders + ma = [0] # Start with 0 + if ma_orders[i] > 0: + ma.extend(range(min(1, ma_orders[i]), ma_orders[i] + 1)) + ma_values.append([x * lag for x in set(ma)]) + + # Get non-zero values + result["non_zero_ari"] = sorted(set([x for sublist in ari_values for x in sublist if x != 0])) + result["non_zero_ma"] = sorted(set([x for sublist in ma_values for x in sublist if x != 0])) + + # Combine and sort unique lags for ARIMA components + result["lags_model_arima"] = sorted(set(result["non_zero_ari"] + result["non_zero_ma"])) + + # Initialize ARMA parameters if needed + if result["ar_required"] or result["ma_required"]: + result["arma_parameters"] = [] + # Add AR parameters + if result["ar_required"]: + for lag in validated_lags: + for i in range(max(ar_orders)): + result["arma_parameters"].append({ + "name": f"phi{i+1}[{lag}]", + "value": None + }) + # Add MA parameters + if result["ma_required"]: + for lag in validated_lags: + for i in range(max(ma_orders)): + result["arma_parameters"].append({ + "name": f"theta{i+1}[{lag}]", + "value": None + }) + + # Warning for high frequency data + if max(validated_lags) >= 24 and not silent: + _warn("The estimation of ARIMA model with large lags might take time. Consider initial='backcasting' or simpler orders.", silent) + + return result + +def _check_distribution_loss(distribution, loss, silent=False): + """Check distribution and loss from a known set, falling back to defaults if invalid.""" + valid_distributions = { + "default","dnorm","dlaplace","dalaplace","ds","dgnorm", + "dlnorm","dinvgauss","dgamma" + } + if distribution not in valid_distributions: + _warn(f"distribution '{distribution}' not recognized. Using 'default'.", silent) + distribution = "default" + + valid_losses = { + "likelihood","MSE","MAE","HAM","LASSO","RIDGE", + "MSEh","TMSE","GTMSE","MSCE","MAEh","TMAE","GTMAE","MACE", + "HAMh","THAM","GTHAM","CHAM","GPL", + "aMSEh","aTMSE","aGTMSE","aMSCE","aGPL","custom" + } + if loss not in valid_losses: + _warn(f"loss '{loss}' is not recognized. Using 'likelihood'.", silent) + loss = "likelihood" + + return {"distribution": distribution, "loss": loss} + +def _check_outliers(outliers_mode, silent=False): + """Ensure outliers mode is one of 'ignore','use','select'.""" + choices = ["ignore","use","select"] + if outliers_mode not in choices: + _warn(f"outliers='{outliers_mode}' not recognized. Switching to 'ignore'.", silent) + outliers_mode = "ignore" + return outliers_mode + +def _check_phi(phi, damped, silent=False): + """ + If damped is True, we want 0 fallback to 0.95. + If user doesn't supply => default to 0.95 if damped, else 1. + """ + if damped: + if phi is None: + phi_val = 0.95 + phi_est = True + else: + # numeric? + try: + p = float(phi) + if p <= 0 or p >= 2: + _warn(f"Damping parameter should lie in (0,2). Changing to estimate with initial=0.95", silent) + phi_val = 0.95 + phi_est = True + else: + phi_val = p + phi_est = False + except: + _warn(f"Provided value of phi is invalid. Using 0.95 as a guess, will estimate it.", silent) + phi_val = 0.95 + phi_est = True + else: + # not damped => phi=1 + phi_val = 1.0 + phi_est = False + + return {"phi": phi_val, "phi_estimate": phi_est} + +def _check_persistence(persistence, ets_model, trend_type, season_type, lags_model_seasonal, xreg_model=False, silent=False): + """ + Check persistence parameters and return dictionary with all persistence-related variables. + Mirrors R code's persistence handling. + + Args: + persistence: The persistence parameter (can be None, numeric, list, or dict) + ets_model: Boolean indicating if ETS model is used + trend_type: The trend type ('N', 'A', 'M' etc) + season_type: The seasonal type ('N', 'A', 'M' etc) + lags_model_seasonal: List of seasonal lags + xreg_model: Boolean indicating if explanatory variables are used + silent: Whether to suppress warnings + + Returns: + dict: Dictionary containing all persistence-related variables + """ + n_seasonal = len(lags_model_seasonal) if lags_model_seasonal else 0 + + result = { + "persistence": None, + "persistence_estimate": True, + "persistence_level": None, + "persistence_level_estimate": True, + "persistence_trend": None, + "persistence_trend_estimate": True, + "persistence_seasonal": [None] * n_seasonal, + "persistence_seasonal_estimate": [True] * n_seasonal, + "persistence_xreg": None, + "persistence_xreg_estimate": True, + "persistence_xreg_provided": False + } + + # If no persistence provided, return defaults + if persistence is None: + return result + + # Handle different persistence input types + if isinstance(persistence, dict): + # Named dictionary case (similar to R's named list) + if "level" in persistence or "alpha" in persistence: + result["persistence_level"] = persistence.get("level", persistence.get("alpha")) + result["persistence_level_estimate"] = False + + if "trend" in persistence or "beta" in persistence: + result["persistence_trend"] = persistence.get("trend", persistence.get("beta")) + result["persistence_trend_estimate"] = False + + if "seasonal" in persistence or "gamma" in persistence: + seasonal_value = persistence.get("seasonal", persistence.get("gamma")) + if isinstance(seasonal_value, (list, tuple)): + # Set ALL components based on length match + all_estimate = len(seasonal_value) != len(lags_model_seasonal) + result["persistence_seasonal_estimate"] = [all_estimate] * n_seasonal + if not all_estimate: + result["persistence_seasonal"] = list(seasonal_value) + else: + # Non-list value provided - set ALL to False + result["persistence_seasonal_estimate"] = [False] * n_seasonal + result["persistence_seasonal"][0] = seasonal_value + + if "xreg" in persistence or "delta" in persistence: + result["persistence_xreg"] = persistence.get("xreg", persistence.get("delta")) + result["persistence_xreg_estimate"] = False + result["persistence_xreg_provided"] = True + + elif isinstance(persistence, (list, tuple)): + # List/tuple case + if len(persistence) > 0 and persistence[0] is not None: + result["persistence_level"] = persistence[0] + result["persistence_level_estimate"] = False + + if len(persistence) > 1 and persistence[1] is not None: + result["persistence_trend"] = persistence[1] + result["persistence_trend_estimate"] = False + + if len(persistence) > 2 and persistence[2] is not None: + result["persistence_seasonal"] = persistence[2] + result["persistence_seasonal_estimate"] = False + + if len(persistence) > 3 and persistence[3] is not None: + result["persistence_xreg"] = persistence[3] + result["persistence_xreg_estimate"] = False + result["persistence_xreg_provided"] = True + + elif isinstance(persistence, (int, float)): + # Single numeric value case + result["persistence_level"] = float(persistence) + result["persistence_level_estimate"] = False + + else: + _warn("Persistence is not numeric/list/dict. We'll estimate it instead.", silent) + return result + + # Update main persistence estimate flag based on component estimates + result["persistence_estimate"] = any([ + result["persistence_level_estimate"] and ets_model, + result["persistence_trend_estimate"] and trend_type != "N", + result["persistence_seasonal_estimate"] and season_type != "N", + result["persistence_xreg_estimate"] and xreg_model + ]) + + # Make sure only relevant components are estimated + if not ets_model: + result["persistence_level_estimate"] = False + result["persistence_level"] = None + + if trend_type == "N": + result["persistence_trend_estimate"] = False + result["persistence_trend"] = None + + if season_type == "N": + result["persistence_seasonal_estimate"] = False + result["persistence_seasonal"] = None + + if not xreg_model: + result["persistence_xreg_estimate"] = False + result["persistence_xreg"] = None + result["persistence_xreg_provided"] = False + + return result + +def _check_initial(initial, ets_model, trend_type, season_type, arima_model=False, xreg_model=False, silent=False): + """ + Check initial parameters and return dictionary with all initial-related variables. + Mirrors R code's initial handling. + + Args: + initial: The initial parameter (can be None, numeric, list/tuple, or dict) + ets_model: Boolean indicating if ETS model is used + trend_type: The trend type ('N', 'A', 'M' etc) + season_type: The seasonal type ('N', 'A', 'M' etc) + arima_model: Boolean indicating if ARIMA model is used + xreg_model: Boolean indicating if explanatory variables are used + silent: Whether to suppress warnings + + Returns: + dict: Dictionary containing all initial-related variables + """ + result = { + "initial": initial, # Store original value + "initial_type": "optimal", + "initial_estimate": True, + "initial_level": None, + "initial_level_estimate": True, + "initial_trend": None, + "initial_trend_estimate": True, + "initial_seasonal": None, + "initial_seasonal_estimate": True, + "initial_arima": None, + "initial_arima_estimate": True, + "initial_arima_number": 0, # Will be set properly if ARIMA model is used + "initial_xreg_estimate": True, + "initial_xreg_provided": False + } + + # If no initial provided, return defaults with optimal type + if initial is None: + if not silent: + print("Initial value is not selected. Switching to optimal.") + return result + + # Handle string types + if isinstance(initial, str): + valid_types = ["optimal", "backcasting", "complete", "provided"] + if initial not in valid_types: + _warn(f"Initial '{initial}' not recognized. Using 'optimal'.", silent) + return result + + result["initial_type"] = initial + # Set estimate flags based on type + is_estimate = (initial != "provided" and initial != "complete") + result["initial_estimate"] = is_estimate + result["initial_level_estimate"] = is_estimate + result["initial_trend_estimate"] = is_estimate + result["initial_seasonal_estimate"] = is_estimate + result["initial_arima_estimate"] = is_estimate + result["initial_xreg_estimate"] = is_estimate + return result + + # Handle dictionary case (similar to R's named list) + if isinstance(initial, dict): + result["initial_type"] = "provided" + result["initial_estimate"] = False + + if "level" in initial: + result["initial_level"] = initial["level"] + result["initial_level_estimate"] = False + + if "trend" in initial: + result["initial_trend"] = initial["trend"] + result["initial_trend_estimate"] = False + + if "seasonal" in initial: + result["initial_seasonal"] = initial["seasonal"] + result["initial_seasonal_estimate"] = False + + if "arima" in initial: + result["initial_arima"] = initial["arima"] + result["initial_arima_estimate"] = False + + if "xreg" in initial: + result["initial_xreg_provided"] = True + result["initial_xreg_estimate"] = False + + # Handle numeric or list/tuple case + elif isinstance(initial, (int, float, list, tuple)): + result["initial_type"] = "provided" + result["initial_estimate"] = False + + # Convert to list for consistent handling + init_values = [initial] if isinstance(initial, (int, float)) else list(initial) + + # Assign values based on position + if len(init_values) > 0: + result["initial_level"] = init_values[0] + result["initial_level_estimate"] = False + + if len(init_values) > 1 and trend_type != "N": + result["initial_trend"] = init_values[1] + result["initial_trend_estimate"] = False + + if len(init_values) > 2 and season_type != "N": + result["initial_seasonal"] = init_values[2] + result["initial_seasonal_estimate"] = False + + if len(init_values) > 3 and arima_model: + result["initial_arima"] = init_values[3] + result["initial_arima_estimate"] = False + + else: + _warn("Initial vector is not numeric! Using optimal initialization.", silent) + return result + + # Make sure only relevant components are estimated + if not ets_model: + result["initial_level_estimate"] = False + result["initial_level"] = None + + if trend_type == "N": + result["initial_trend_estimate"] = False + result["initial_trend"] = None + + if season_type == "N": + result["initial_seasonal_estimate"] = False + result["initial_seasonal"] = None + + if not arima_model: + result["initial_arima_estimate"] = False + result["initial_arima"] = None + result["initial_arima_number"] = 0 + + if not xreg_model: + result["initial_xreg_estimate"] = False + result["initial_xreg_provided"] = False + + return result + +def _check_constant(constant, silent=False): + """ + R code: numeric => use it as drift/constant, logical => estimate or not. + """ + if isinstance(constant, bool): + return { + "constant_required": constant, + "constant_estimate": constant, + "constant_value": None + } + if isinstance(constant, (float,int)): + return { + "constant_required": True, + "constant_estimate": False, + "constant_value": float(constant) + } + _warn(f"The parameter 'constant' can only be bool or numeric. Found '{constant}'. Switching to FALSE", silent) + return { + "constant_required": False, + "constant_estimate": False, + "constant_value": None + } + +def _initialize_estimation_params( + loss, + lambda_param, + ets_info, + arima_info, + silent=False +): + """ + Initialize estimation parameters, particularly for LASSO/RIDGE cases. + This mirrors the R code's initialization logic in the model_do="estimate" section. + + Args: + loss (str): Loss function type + lambda_param (float): Lambda parameter for LASSO/RIDGE + ets_info (dict): ETS model information + arima_info (dict): ARIMA model information + silent (bool): Whether to suppress warnings + + Returns: + dict: Dictionary containing initialized parameters and lambda value + """ + # Only proceed with special initialization if LASSO/RIDGE with lambda=1 + if loss not in ["LASSO", "RIDGE"] or lambda_param != 1: + return { + "lambda": lambda_param, + "persistence_params": None, + "arma_params": None + } + + result = { + "lambda": 0 # Set lambda to 0 for initial estimation + } + + # Initialize persistence parameters if ETS model + if ets_info["ets_model"]: + persistence_params = { + "estimate": False, + "level_estimate": False, + "trend_estimate": False, + "seasonal_estimate": False, + "level": 0, + "trend": 0, + "seasonal": 0, + "phi": 1, + "phi_estimate": False + } + result["persistence_params"] = persistence_params + + # Initialize ARMA parameters if ARIMA model + if arima_info["arima_model"]: + ar_orders = arima_info["ar_orders"] + ma_orders = arima_info["ma_orders"] + lags = sorted(set([1] + (arima_info.get("lags", []) or []))) # Ensure we have lags + + # Initialize ARMA parameters + arma_params = { + "ar_estimate": False, + "ma_estimate": False, + "parameters": [] + } + + # Build ARMA parameters list with proper naming + for lag in lags: + # Add AR parameters (all set to 1) + ar_count = ar_orders[0] if ar_orders else 0 # Simplified - might need adjustment + for i in range(ar_count): + arma_params["parameters"].append({ + "name": f"phi{i+1}[{lag}]", + "value": 1 + }) + + # Add MA parameters (all set to 0) + ma_count = ma_orders[0] if ma_orders else 0 # Simplified - might need adjustment + for i in range(ma_count): + arma_params["parameters"].append({ + "name": f"theta{i+1}[{lag}]", + "value": 0 + }) + + result["arma_params"] = arma_params + + return result + +def _organize_model_type_info(ets_info, arima_info, xreg_model=False): + """ + Organize basic model type information into a structured dictionary. + + Args: + ets_info (dict): ETS model information + arima_info (dict): ARIMA model information + xreg_model (bool): Whether explanatory variables are used + + Returns: + dict: Model type information + """ + return { + "ets_model": ets_info["ets_model"], + "arima_model": arima_info["arima_model"], + "xreg_model": xreg_model, + "model": ets_info["model"], + "error_type": ets_info["error_type"], + "trend_type": ets_info["trend_type"], + "season_type": ets_info["season_type"], + "damped": ets_info["damped"], + "allow_multiplicative": ets_info["allow_multiplicative"], + "model_is_trendy": ets_info["trend_type"] != "N", + "model_is_seasonal": ets_info["season_type"] != "N", + "model_do": "estimate", # default, can be overridden + "models_pool": ets_info.get("models_pool", None) + } + +def _organize_components_info(ets_info, arima_info, lags_model_seasonal): + """ + Organize components information into a structured dictionary. + + Args: + ets_info (dict): ETS model information + arima_info (dict): ARIMA model information + lags_model_seasonal (list): List of seasonal lags + + Returns: + dict: Components information + """ + # Calculate ETS components + components_number_ets = sum([ + ets_info["ets_model"], # level component if ETS + ets_info["trend_type"] != "N", # trend component if present + ets_info["season_type"] != "N" # seasonal component if present + ]) + + components_names_ets = [ + name for include, name in [ + (ets_info["ets_model"], "level"), + (ets_info["trend_type"] != "N", "trend"), + (ets_info["season_type"] != "N", "seasonal") + ] if include + ] + + # Calculate seasonal components + components_number_ets_seasonal = int(ets_info["season_type"] != "N") + components_number_ets_non_seasonal = components_number_ets - components_number_ets_seasonal + + return { + # ETS components + "components_number_ets": components_number_ets, + "components_names_ets": components_names_ets, + "components_number_ets_seasonal": components_number_ets_seasonal, + "components_number_ets_non_seasonal": components_number_ets_non_seasonal, + + # ARIMA components + "components_number_arima": len(arima_info.get("lags_model_arima", [])), + "components_names_arima": [f"ARIMAState{i+1}" for i in range(len(arima_info.get("lags_model_arima", [])))], + + # Seasonal info + "lags_model_seasonal": lags_model_seasonal, + + # Total components + "components_number_total": (components_number_ets + + len(arima_info.get("lags_model_arima", []))) + } + +def _organize_lags_info(validated_lags, lags_model, lags_model_seasonal, lags_model_arima, xreg_model=False): + """Organize lags information into a dictionary""" + # Calculate all model lags (ETS + ARIMA + X) + if xreg_model: + # If xreg exists, add ones for each xreg variable + lags_model_all = sorted(set(lags_model + lags_model_arima + [1])) + else: + lags_model_all = sorted(set(lags_model + (lags_model_arima if lags_model_arima else []))) + + return { + "lags": validated_lags, + "lags_model": lags_model, + "lags_model_seasonal": lags_model_seasonal, + "lags_model_arima": lags_model_arima, + "lags_model_all": lags_model_all, + "max_lag": max(lags_model_all) if lags_model_all else 1, + "lags_length": len(validated_lags) + } + +def _organize_occurrence_info(occurrence, occurrence_model, obs_in_sample, h=0): + """ + Organize occurrence information into a structured dictionary. + + Args: + occurrence (str): Occurrence type + occurrence_model (bool): Whether occurrence model is used + obs_in_sample (int): Number of observations in sample + h (int): Forecast horizon + + Returns: + dict: Occurrence information + """ + # Initialize with default values matching R code + p_fitted = np.ones((obs_in_sample, 1)) # matrix(1, obsInSample, 1) in R + p_forecast = np.array([np.nan] * h) # rep(NA,h) in R + oes_model = None + occurrence_model_provided = False + + # Handle occurrence model object case (is.occurrence in R) + if hasattr(occurrence, 'occurrence'): # equivalent to is.occurrence(occurrence) + oes_model = occurrence + occurrence = oes_model.occurrence + if occurrence == "provided": + occurrence_model_provided = False + else: + occurrence_model_provided = True + p_fitted = np.matrix(oes_model.fitted).reshape(obs_in_sample, 1) + + # Handle numeric/logical occurrence + if isinstance(occurrence, (bool, np.bool_)): + occurrence = int(occurrence) + + if isinstance(occurrence, (int, float, np.number)): + if all(occurrence == 1): + occurrence = "none" + else: + # Check bounds + if any(o < 0 or o > 1 for o in np.atleast_1d(occurrence)): + _warn("Parameter 'occurrence' should contain values between zero and one.\nConverting to appropriate vector.") + occurrence = (occurrence != 0).astype(int) + + # Set pFitted from occurrence values + p_fitted[:] = occurrence[:obs_in_sample] + + # Handle forecast values + if h > 0: + if len(occurrence) > obs_in_sample: + p_forecast = occurrence[obs_in_sample:] + else: + p_forecast = np.repeat(occurrence[-1], h) + + # Adjust forecast length + if len(p_forecast) > h: + p_forecast = p_forecast[:h] + elif len(p_forecast) < h: + p_forecast = np.append(p_forecast, np.repeat(p_forecast[-1], h - len(p_forecast))) + else: + p_forecast = np.array([np.nan]) + + occurrence = "provided" + oes_model = {"fitted": p_fitted, "forecast": p_forecast, "occurrence": "provided"} + + return { + "occurrence": occurrence, + "occurrence_model": occurrence_model, + "occurrence_model_provided": occurrence_model_provided, + "p_fitted": p_fitted, + "p_forecast": p_forecast, + "oes_model": oes_model + } + +def _organize_phi_info(phi_val, phi_estimate): + """ + Organize phi information into a structured dictionary. + + Args: + phi_val (float): Phi value + phi_estimate (bool): Whether phi should be estimated + + Returns: + dict: Phi information + """ + return { + "phi": phi_val, + "phi_estimate": phi_estimate + } + +################### +# MAIN ENTRYPOINT # +################### + +def parameters_checker( + data, + model, + lags, + orders = None, + constant=False, + outliers="ignore", + level=0.99, + persistence=None, + phi=None, + initial=None, + distribution="default", + loss="likelihood", + h=0, + holdout=False, + occurrence="none", + ic="AICc", + bounds="usual", + silent=False, + model_do="estimate", + fast=False, + models_pool=None, + lambda_param=None, + frequency=None +): + """ + Extended parameters_checker that includes initialization logic for estimation. + """ + # Extract values if DataFrame/Series and ensure numeric + if hasattr(data, 'values'): + data_values = data.values + if isinstance(data_values, np.ndarray): + data_values = data_values.flatten() + # Convert to numeric if needed + data_values = pd.to_numeric(data_values, errors='coerce') + else: + # Convert to numeric if needed + try: + data_values = pd.to_numeric(data, errors='coerce') + except: + raise ValueError("Data must be numeric or convertible to numeric values") + + ##################### + # 1) Occurrence + ##################### + occ_info = _check_occurrence(data_values, occurrence, silent) + obs_in_sample = occ_info["obs_in_sample"] + obs_nonzero = occ_info["obs_nonzero"] + occurrence_model = occ_info["occurrence_model"] + + ##################### + # 2) Check Lags + ##################### + lags_info = _check_lags(lags, obs_in_sample, silent) + validated_lags = lags_info["lags"] + lags_model = lags_info["lags_model"] + lags_model_seasonal = lags_info["lags_model_seasonal"] + lags_length = lags_info["lags_length"] + max_lag = lags_info["max_lag"] + + ##################### + # 3) Check ETS Model + ##################### + ets_info = _check_ets_model(model, distribution, data, silent) + ets_model = ets_info["ets_model"] # boolean + model_str = ets_info["model"] + error_type = ets_info["error_type"] + trend_type = ets_info["trend_type"] + season_type = ets_info["season_type"] + damped = ets_info["damped"] + allow_mult = ets_info["allow_multiplicative"] + + ##################### + # 4) ARIMA checks + ##################### + arima_info = _check_arima(orders, validated_lags, silent) + arima_model = arima_info["arima_model"] + ar_orders = arima_info["ar_orders"] + i_orders = arima_info["i_orders"] + ma_orders = arima_info["ma_orders"] + lags_model_arima = arima_info["lags_model_arima"] + non_zero_ari = arima_info["non_zero_ari"] + non_zero_ma = arima_info["non_zero_ma"] + + ##################### + # 5) Dist & Loss + ##################### + dist_info = _check_distribution_loss(distribution, loss, silent) + distribution = dist_info["distribution"] + loss = dist_info["loss"] + + ##################### + # 6) Outliers + ##################### + outliers_mode = _check_outliers(outliers, silent) + + ##################### + # 7) Persistence + ##################### + persist_info = _check_persistence( + persistence=persistence, + ets_model=ets_model, + trend_type=trend_type, + season_type=season_type, + lags_model_seasonal=lags_model_seasonal, + xreg_model=False, # You'll need to add xreg handling logic + silent=silent + ) + + ##################### + # 8) Initial + ##################### + init_info = _check_initial( + initial=initial, + ets_model=ets_model, + trend_type=trend_type, + season_type=season_type, + arima_model=arima_model, + xreg_model=False, # You'll need to add xreg handling logic + silent=silent + ) + + # Create initials dictionary + initials_results = { + "initial": init_info["initial"], + "initial_type": init_info["initial_type"], + "initial_estimate": init_info["initial_estimate"], + "initial_level": init_info["initial_level"], + "initial_level_estimate": init_info["initial_level_estimate"], + "initial_trend": init_info["initial_trend"], + "initial_trend_estimate": init_info["initial_trend_estimate"], + "initial_seasonal": init_info["initial_seasonal"], + "initial_seasonal_estimate": init_info["initial_seasonal_estimate"], + "initial_arima": init_info["initial_arima"], + "initial_arima_estimate": init_info["initial_arima_estimate"], + "initial_arima_number": init_info["initial_arima_number"], + "initial_xreg_estimate": init_info["initial_xreg_estimate"], + "initial_xreg_provided": init_info["initial_xreg_provided"] + } + + ##################### + # 9) Constant + ##################### + constant_dict = _check_constant(constant, silent) + + ##################### + # 9.1) Check phi + ##################### + phi_info = _check_phi(phi, damped, silent) + phi_val = phi_info["phi"] + phi_estimate = phi_info["phi_estimate"] + + ##################### + # 10) Validate Bounds + ##################### + if bounds not in ["usual","admissible","none"]: + _warn(f"Unknown bounds='{bounds}'. Switching to 'usual'.", silent) + bounds = "usual" + + ##################### + # 11) holdout logic + ##################### + if holdout and h <= 0: + _warn("holdout=TRUE but horizon 'h' is not positive. No real holdout can be made.", silent) + + ##################### + # 12) Model selection fallback + ##################### + # The R code tries to reduce the model complexity if obs_nonzero < #params, etc. + # We'll do a simplified fallback if sample is too small. This is a partial approach. + # (In R code, there's extensive logic around "if(etsModel && obsNonzero <= nParamMax) {...}") + # We'll do a simpler approach: + if ets_model and (obs_nonzero < 3): + # Switch to ANN or do-nothing approach + _warn("Not enough of non-zero observations for a complicated ETS model. Switching to 'ANN'.", silent) + error_type, trend_type, season_type = "A", "N", "N" + damped = False + phi_val = 1.0 + model_str = "ANN" + # We might do more checks, but keep it short here. + + ##################### + # 13) Model pooling + ##################### + # If model_do in ["select","combine"] and we have a models_pool, ensure it's valid. + # The original R code has logic to unify model & models_pool. We'll only store it here. + if model_do in ["select","combine"] and models_pool is not None: + # The user can specify e.g. ["ANN","MNN"] etc. We won't do deep validation here. + pass + + # Check if multiplicative models are allowed (using data_values instead of data) + allow_multiplicative = not ((any(y <= 0 for y in data_values if not pd.isna(y)) and not occurrence_model) or + (occurrence_model and any(y < 0 for y in data_values if not pd.isna(y)))) + + # Check model composition + model_info = _check_model_composition(model, allow_multiplicative, silent) + final_model_do = model_info["model_do"] + candidate_pool = model_info["models_pool"] + + if final_model_do in ["select", "combine"]: + # This replicates R's auto selection or combination + # (in R, it enumerates, fits each, then picks or combines) + fitted_results = [] + for candidate in candidate_pool: + # parse submodel + sub_error_type = candidate[0] + sub_trend_type = candidate[1] + sub_season_type = candidate[-1] + sub_damped = ('d' in candidate[2:-1]) if len(candidate) == 4 else False + + # Fit the submodel here (omitted for brevity)... + # e.g. sub_fit = _fit_submodel(data, sub_error_type, sub_trend_type, sub_season_type, sub_damped, ...) + # Then store results + fitted_results.append((candidate, None)) # placeholder + + if final_model_do == "select": + # In R, you'd pick best by IC or something; let's pick first for the example + best_model = candidate_pool[0] + else: + # "combine": in R, you'd average forecasts from all or use weights + best_model = candidate_pool[0] # placeholder + + # Overwrite final model with the chosen "best_model" or a combined approach + error_type = best_model[0] + trend_type = best_model[1] + season_type = best_model[-1] + damped = ('d' in best_model[2:-1]) if len(best_model) == 4 else False + if damped and trend_type != 'N': + final_model_str = f"{error_type}{trend_type}d{season_type}" + else: + final_model_str = f"{error_type}{trend_type}{season_type}" + else: + # Normal single model + error_type = model_info["error_type"] + trend_type = model_info["trend_type"] + season_type = model_info["season_type"] + damped = model_info["damped"] + if damped and trend_type != 'N': + final_model_str = f"{error_type}{trend_type}d{season_type}" + else: + final_model_str = f"{error_type}{trend_type}{season_type}" + + # ... continue with ARIMA checks, etc. Pass final_model_str onward ... + # finalize the return or proceed + # ... rest of the existing function ... + + # Create lags dictionary + lags_dict = _organize_lags_info( + validated_lags=validated_lags, + lags_model=lags_model, + lags_model_seasonal=lags_model_seasonal, + lags_model_arima=lags_model_arima, + xreg_model=False # Update this when xreg is implemented + ) + + # Create occurrence dictionary + occurrence_dict = _organize_occurrence_info( + occurrence=occ_info["occurrence"], + occurrence_model=occurrence_model, + obs_in_sample=obs_in_sample, + h=h + ) + + # Create phi dictionary + phi_dict = _organize_phi_info( + phi_val=phi_val, + phi_estimate=phi_estimate + ) + + # Main results dictionary - remove occurrence and phi info + # Create observations dictionary + ot_info = _calculate_ot_logical(data, occurrence_dict["occurrence"], + occurrence_dict["occurrence_model"], + obs_in_sample, + frequency, h, holdout) + observations_dict = { + "obs_in_sample": obs_in_sample, + "obs_nonzero": obs_nonzero, + "obs_all": obs_in_sample + (1 - holdout) * h, + "ot_logical": ot_info["ot_logical"], + "ot": ot_info["ot"], + "y_in_sample": ot_info.get("y_in_sample", data), # Use split data if available + "y_holdout": ot_info.get("y_holdout", None), # Add holdout data + "frequency": ot_info["frequency"], + "y_start": ot_info["y_start"], + "y_in_sample_index": ot_info.get("y_in_sample_index", None), # Add the index to observations_dict + "y_forecast_start": ot_info["y_forecast_start"] # Make sure this is here too + } + + # Create general dictionary with remaining parameters + general = { + "distribution": distribution, + "loss": loss, + "outliers": outliers_mode, + "h": h, + "holdout": holdout, + "ic": ic, + "bounds": bounds, + "model_do": model_do, + "fast": fast, + "models_pool": models_pool + } + + ##################### + # Initialize Estimation + ##################### + if model_do == "estimate": + init_params = _initialize_estimation_params( + loss=loss, + lambda_param=lambda_param or 1, # Default to 1 if not provided + ets_info=ets_info, + arima_info=arima_info, + silent=silent + ) + # Update results with initialization parameters + general.update({ + "lambda": init_params["lambda"], + "persistence_params": init_params.get("persistence_params"), + "arma_params": init_params.get("arma_params") + }) + + # Persistence-specific dictionary + persistence_results = { + "persistence": persist_info["persistence"], + "persistence_estimate": persist_info["persistence_estimate"], + "persistence_level": persist_info["persistence_level"], + "persistence_level_estimate": persist_info["persistence_level_estimate"], + "persistence_trend": persist_info["persistence_trend"], + "persistence_trend_estimate": persist_info["persistence_trend_estimate"], + "persistence_seasonal": persist_info["persistence_seasonal"], + "persistence_seasonal_estimate": persist_info["persistence_seasonal_estimate"], + "persistence_xreg": persist_info["persistence_xreg"], + "persistence_xreg_estimate": persist_info["persistence_xreg_estimate"], + "persistence_xreg_provided": persist_info["persistence_xreg_provided"] + } + + # ARIMA-specific dictionary + arima_results = { + "arima_model": arima_model, + "ar_orders": ar_orders, + "i_orders": i_orders, + "ma_orders": ma_orders, + "ar_required": arima_info.get("ar_required", False), + "i_required": arima_info.get("i_required", False), + "ma_required": arima_info.get("ma_required", False), + "ar_estimate": arima_info.get("ar_estimate", False), + "ma_estimate": arima_info.get("ma_estimate", False), + "arma_parameters": arima_info.get("arma_parameters", None), + "non_zero_ari": non_zero_ari, + "non_zero_ma": non_zero_ma, + "select": arima_info.get("select", False) + } + + # Create model type dictionary + model_type_dict = _organize_model_type_info(ets_info, arima_info, xreg_model=False) + + # Create components dictionary + components_dict = _organize_components_info(ets_info, arima_info, lags_model_seasonal) + + # Update model_do based on input parameter + model_type_dict["model_do"] = model_do + + + # Initiliaze the explonatory dict -> will not be used for now + xreg_dict = { + "xreg_model": False, + "regressors": None, + "xreg_model_initials": None, + "xreg_data": None, + "xreg_number": 0, + "xreg_names": None, + "response_name": None, + "formula": None, + "xreg_parameters_missing": None, + "xreg_parameters_included": None, + "xreg_parameters_estimated": None, + "xreg_parameters_persistence": None + } + + # Calculate parameters number + # Calculate parameters number + params_info = _calculate_parameters_number( + ets_info=ets_info, + arima_info=arima_info, + xreg_info=None, # Add xreg handling if needed + constant_required=constant_dict["constant_required"] + ) + + # Return all dictionaries including new lags_dict + return (general, + observations_dict, + persistence_results, + initials_results, + arima_results, + constant_dict, + model_type_dict, + components_dict, + lags_dict, + occurrence_dict, + phi_dict, + xreg_dict, + params_info) + +# Calculate otLogical based on the R code logic +def _calculate_ot_logical(data, occurrence, occurrence_model, obs_in_sample, frequency=None, h=0, holdout=False): + """Calculate otLogical vector and ot based on occurrence type and data + + Args: + data: Input time series data + occurrence (str): Occurrence type + occurrence_model (bool): Whether occurrence model is used + obs_in_sample (int): Number of in-sample observations + frequency (int, optional): Time series frequency. If None, will be inferred. + h (int): Forecast horizon + holdout (bool): Whether to create holdout set + + Returns: + dict: Dictionary containing ot_logical, ot, frequency, y_start and y_holdout + """ + # Convert data to numpy array if needed + if hasattr(data, 'values'): + y_in_sample = data.values.flatten() if hasattr(data.values, 'flatten') else data.values + else: + y_in_sample = np.asarray(data).flatten() + + # Handle holdout if requested and possible + y_holdout = None + if holdout and h > 0 and len(y_in_sample) > h: + # Split the data + y_holdout = y_in_sample[-h:] + y_in_sample = y_in_sample[:-h] + + # Initial calculation - data != 0 + ot_logical = y_in_sample != 0 + + # If occurrence is "none" and all values are non-zero, set all to True + if occurrence == "none" and all(ot_logical): + ot_logical = np.ones_like(ot_logical, dtype=bool) + + # If occurrence model is not used and occurrence is not "provided" + if not occurrence_model and occurrence != "provided": + ot_logical = np.ones_like(ot_logical, dtype=bool) + + # Use provided frequency if available, otherwise infer it + if frequency is not None: + freq = str(frequency) + else: + # Default frequency + freq = "1" # Default string frequency + + # Try to infer frequency from data if it's a pandas Series/DataFrame + if hasattr(data, 'index'): + # Try to get frequency from index + if hasattr(data.index, 'freq') and data.index.freq is not None: + # Get the actual frequency string + freq = data.index.freq + if not freq: # If empty string, fallback to default + freq = "1" + + # Get start time from index if available + y_start = 0 # default + if hasattr(data, 'index') and len(data.index) > 0: + y_start = data.index[0] + + # Create ot based on otLogical + # Check if data is a time series (has frequency attribute) + if hasattr(data, 'freq') or hasattr(data, 'index'): + # For time series data, create time series ot + freq_pd = getattr(data, 'freq', None) or getattr(data.index, 'freq', None) + if hasattr(data.index[0], 'to_timestamp'): + start = data.index[0].to_timestamp() + else: + start = data.index[0] + + # Ensure ot_logical is 1-dimensional before creating Series + ot = pd.Series(ot_logical.ravel().astype(int), + index=pd.date_range(start=start, periods=len(ot_logical), freq=freq_pd)) + else: + # For non-time series data, create simple array + ot = ot_logical.ravel().astype(int) + + # Get the index if available + y_in_sample_index = None + if hasattr(data, 'index'): + y_in_sample_index = data.index[:-h] if holdout else data.index + + # Get forecast start based on index + if holdout: + y_forecast_start = data.index[obs_in_sample] if hasattr(data, 'index') else obs_in_sample + 1 + else: + if hasattr(data, 'index'): + # Get the last observation and add one time unit + y_forecast_start = data.index[obs_in_sample-1] + (data.index[obs_in_sample-1] - data.index[obs_in_sample-2]) + else: + y_forecast_start = obs_in_sample + + return { + "ot_logical": ot_logical, + "ot": ot, + "frequency": freq, + "y_start": y_start, + "y_holdout": y_holdout, + "y_in_sample_index": y_in_sample_index, + "y_forecast_start": y_forecast_start + } + +def _adjust_model_for_sample_size( + model_info, + obs_nonzero, + lags_model_max, + allow_multiplicative=True, + xreg_number=0, + silent=False +): + """ + Adjust model selection based on sample size, matching R's logic. + """ + error_type = model_info["error_type"] + trend_type = model_info["trend_type"] + season_type = model_info["season_type"] + model_do = model_info["model_do"] + models_pool = model_info["models_pool"] + + n_param_exo = xreg_number * 2 # For both initial and persistence + + # If sample is too small for current model + if obs_nonzero <= 3 + n_param_exo: + if obs_nonzero == 3: + if error_type in ["A", "M"]: + model_do = "estimate" + trend_type = season_type = "N" + else: + models_pool = ["ANN"] + if allow_multiplicative: + models_pool.append("MNN") + model_do = "select" + error_type = trend_type = season_type = "N" + + return { + "error_type": error_type, + "trend_type": trend_type, + "season_type": season_type, + "model_do": model_do, + "models_pool": models_pool, + "persistence_estimate": False, + "persistence_level": 0 + } + + elif obs_nonzero == 2: + return { + "error_type": "A", + "trend_type": "N", + "season_type": "N", + "model_do": "use", + "models_pool": None, + "persistence_estimate": False, + "persistence_level": 0, + "initial_estimate": False + } + + elif obs_nonzero == 1: + return { + "error_type": "A", + "trend_type": "N", + "season_type": "N", + "model_do": "use", + "models_pool": None, + "persistence_estimate": False, + "persistence_level": 0, + "initial_estimate": False + } + + # Handle larger but still limited samples + if obs_nonzero <= 5 + n_param_exo: + trend_type = "N" + if len(model_info.get("model", "")) == 4: + model = f"{error_type}N{season_type}" + + if obs_nonzero <= 2 * lags_model_max: + season_type = "N" + if models_pool: + models_pool = [m for m in models_pool if m[-1] == "N"] + + return { + "error_type": error_type, + "trend_type": trend_type, + "season_type": season_type, + "model_do": model_do, + "models_pool": models_pool + } + +def _calculate_parameters_number(ets_info, arima_info, xreg_info=None, constant_required=False): + """Calculate number of parameters for different model components. + + Returns a 2x1 array-like structure similar to R's parametersNumber matrix: + - Row 1: Number of states/components + - Row 2: Number of parameters to estimate + """ + # Initialize parameters number matrix (2x1) + parameters_number = [[0], [0]] # Mimics R's matrix(0,2,1) + + # Count states (first row) + if ets_info["ets_model"]: + # Add level component + parameters_number[0][0] += 1 + # Add trend if present + if ets_info["trend_type"] != "N": + parameters_number[0][0] += 1 + # Add seasonal if present + if ets_info["season_type"] != "N": + parameters_number[0][0] += 1 + + # Count parameters to estimate (second row) + if ets_info["ets_model"]: + # Level persistence + parameters_number[1][0] += 1 + # Trend persistence if present + if ets_info["trend_type"] != "N": + parameters_number[1][0] += 1 + # Additional parameter for damped trend + if ets_info["damped"]: + parameters_number[1][0] += 1 + # Seasonal persistence if present + if ets_info["season_type"] != "N": + parameters_number[1][0] += 1 + + # Add ARIMA parameters if present + if arima_info["arima_model"]: + # Add number of ARMA parameters + parameters_number[1][0] += len(arima_info.get("arma_parameters", [])) + + # Add constant if required + if constant_required: + parameters_number[1][0] += 1 + + # Handle pure constant model case (no ETS, no ARIMA, no xreg) + if not ets_info["ets_model"] and not arima_info["arima_model"] and not xreg_info: + parameters_number[0][0] = 0 + parameters_number[1][0] = 2 # Matches R code line 3047 + + return { + "parameters_number": parameters_number, + "n_states": parameters_number[0][0], + "n_params": parameters_number[1][0] + } diff --git a/python/smooth/adam_general/core/creator.py b/python/smooth/adam_general/core/creator.py index 0ca97be1..66bafbe5 100644 --- a/python/smooth/adam_general/core/creator.py +++ b/python/smooth/adam_general/core/creator.py @@ -1,87 +1,132 @@ import numpy as np -from typing import List, Optional, Dict, Any -from python.smooth.adam_general.core.utils.utils import measurement_inverter, scaler, msdecompose, calculate_acf, calculate_pacf, calculate_likelihood, calculate_entropy, calculate_multistep_loss +from typing import List, Optional, Dict, Any, Union +from core.utils.utils import ( + measurement_inverter, scaler, msdecompose, calculate_acf, + calculate_pacf, calculate_likelihood, calculate_entropy, + calculate_multistep_loss +) +from core.utils.polynomials import adam_polynomialiser from scipy import stats from scipy.linalg import eigvals from scipy.optimize import minimize -from python.smooth.adam_general.core.utils.cost_functions import logLikReturn import pandas as pd -def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - lags, lags_model, lags_model_arima, lags_model_all, lags_model_max, - profiles_recent_table, profiles_recent_provided, - obs_states, obs_in_sample, obs_all, components_number_ets, components_number_ets_seasonal, - components_names_ets, ot_logical, y_in_sample, - # Persistence and phi - persistence=None, persistence_estimate=True, - persistence_level=None, persistence_level_estimate=True, - persistence_trend=None, persistence_trend_estimate=True, - persistence_seasonal=None, persistence_seasonal_estimate=True, - persistence_xreg=None, persistence_xreg_estimate=True, persistence_xreg_provided=False, - phi=1, - # Initials - initial_type="optimal", initial_estimate=True, - initial_level=None, initial_level_estimate=True, - initial_trend=None, initial_trend_estimate=True, - initial_seasonal=None, initial_seasonal_estimate=True, - initial_arima=None, initial_arima_estimate=True, initial_arima_number=None, - initial_xreg_estimate=True, initial_xreg_provided=False, - # ARIMA elements - arima_model=False, ar_estimate=True, i_required=False, ma_estimate=True, arma_parameters=None, - ar_orders=None, i_orders=None, ma_orders=None, ar_required=False, ma_required=False, - non_zero_ari=None, non_zero_ma=None, - components_number_arima=0, components_names_arima=None, - # Explanatory variables - xreg_model=False, xreg_model_initials=None, xreg_data=None, xreg_number=0, xreg_names=None, - xreg_parameters_persistence=None, - # Constant - constant_required=False, constant_estimate=True, constant_value=None, constant_name=None): +def creator( + # Model type info + model_type_dict, + + # Lags info + lags_dict, + + # Profiles + profiles_dict, + + # Observation info + observations_dict, + + # Parameter dictionaries + persistence_checked, + initials_checked, + arima_checked, + constants_checked, + + phi_dict, + + # Components info + components_dict, + + explanatory_checked = None, + ): + """ + Creates the model matrices for ADAM. + + Args: + model_type_dict: Dictionary containing model type information + lags_dict: Dictionary containing lags information + profiles_dict: Dictionary containing profiles information + observations_dict: Dictionary containing observation information + persistence_checked: Dictionary of persistence parameters + initials_checked: Dictionary of initial values + arima_checked: Dictionary of ARIMA parameters + constants_checked: Dictionary of constant parameters + explanatory_checked: Dictionary of explanatory variables parameters + phi_dict: Dictionary containing phi parameters + components_dict: Dictionary containing component information + """ + + # Extract observation values + obs_states = observations_dict["obs_states"] + obs_in_sample = observations_dict["obs_in_sample"] + obs_all = observations_dict["obs_all"] + ot_logical = observations_dict["ot_logical"] + y_in_sample = observations_dict["y_in_sample"] + obs_nonzero = observations_dict['obs_nonzero'] + # Extract values from dictionaries + ets_model = model_type_dict["ets_model"] + e_type = model_type_dict["error_type"] + t_type = model_type_dict["trend_type"] + s_type = model_type_dict["season_type"] + model_is_trendy = model_type_dict["model_is_trendy"] + model_is_seasonal = model_type_dict["model_is_seasonal"] + + components_number_ets = components_dict["components_number_ets"] + components_number_ets_seasonal = components_dict["components_number_ets_seasonal"] + components_number_arima = components_dict.get("components_number_arima", 0) + + phi = phi_dict["phi"] + + lags = lags_dict["lags"] + lags_model = lags_dict["lags_model"] + lags_model_arima = lags_dict["lags_model_arima"] + lags_model_all = lags_dict["lags_model_all"] + lags_model_max = lags_dict["lags_model_max"] + + profiles_recent_table = profiles_dict["profiles_recent_table"] + profiles_recent_provided = profiles_dict["profiles_recent_provided"] # Matrix of states. Time in columns, components in rows - mat_vt = np.full((components_number_ets + components_number_arima + xreg_number + constant_required, obs_states), np.nan) + mat_vt = np.full((components_number_ets + components_number_arima + + explanatory_checked['xreg_number'] + constants_checked['constant_required'], + obs_states), np.nan) # Measurement rowvector - mat_wt = np.ones((obs_all, components_number_ets + components_number_arima + xreg_number + constant_required)) + mat_wt = np.ones((obs_all, components_number_ets + components_number_arima + + explanatory_checked['xreg_number'] + constants_checked['constant_required'])) # If xreg are provided, then fill in the respective values in Wt vector - if xreg_model: - mat_wt[:, components_number_ets + components_number_arima:components_number_ets + components_number_arima + xreg_number] = xreg_data + if explanatory_checked['xreg_model']: + mat_wt[:, components_number_ets + components_number_arima: + components_number_ets + components_number_arima + explanatory_checked['xreg_number']] = \ + explanatory_checked['xreg_data'] # Transition matrix - mat_f = np.eye(components_number_ets + components_number_arima + xreg_number + constant_required) + mat_f = np.eye(components_number_ets + components_number_arima + explanatory_checked['xreg_number'] + constants_checked['constant_required']) # Persistence vector - vec_g = np.zeros((components_number_ets + components_number_arima + xreg_number + constant_required, 1)) + vec_g = np.zeros((components_number_ets + components_number_arima + explanatory_checked['xreg_number'] + constants_checked['constant_required'], 1)) #vec_g_index = components_names_ets + components_names_arima + xreg_names + constant_name - obs_nonzero = np.sum(y_in_sample != 0) + j = 0 # ETS model, persistence if ets_model: j += 1 - #vec_g_index[j-1] = "alpha" - if not persistence_level_estimate: - vec_g[j-1, 0] = persistence_level + if not persistence_checked['persistence_level_estimate']: + vec_g[j-1, 0] = persistence_checked['persistence_level'] if model_is_trendy: j += 1 - #vec_g_index[j-1] = "beta" - if not persistence_trend_estimate: - vec_g[j-1, 0] = persistence_trend + if not persistence_checked['persistence_trend_estimate']: + vec_g[j-1, 0] = persistence_checked['persistence_trend'] if model_is_seasonal: - if not all(persistence_seasonal_estimate): - vec_g[j + np.where(np.logical_not(persistence_seasonal_estimate))[0], 0] = persistence_seasonal - - #if components_number_ets_seasonal > 1: - #vec_g_index[j:j+components_number_ets_seasonal] = [f"gamma{i}" for i in range(1, components_number_ets_seasonal+1)] - # else: - # vec_g_index[j] = "gamma" + if not all(persistence_checked['persistence_seasonal_estimate']): + vec_g[j + np.where(np.logical_not(persistence_checked['persistence_seasonal_estimate']))[0], 0] = persistence_checked['persistence_seasonal'] # ARIMA model, names for persistence - if arima_model: + if arima_checked['arima_model']: # Remove diagonal from the ARIMA part of the matrix mat_f[j:j+components_number_arima, j:j+components_number_arima] = 0 #if components_number_arima > 1: @@ -91,14 +136,13 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona j += components_number_arima # Modify transition to do drift - if not arima_model and constant_required: + if not arima_checked['arima_model'] and constants_checked['constant_required']: mat_f[0, -1] = 1 # Regression, persistence - if xreg_model: - if persistence_xreg_provided and not persistence_xreg_estimate: - vec_g[j:j+xreg_number, 0] = persistence_xreg - #vec_g_index[j:j+xreg_number] = [f"delta{i}" for i in xreg_parameters_persistence] + if explanatory_checked['xreg_model']: + if persistence_checked['persistence_xreg_provided'] and not persistence_checked['persistence_xreg_estimate']: + vec_g[j:j+explanatory_checked['xreg_number'], 0] = persistence_checked['persistence_xreg'] # Damping parameter value if ets_model and model_is_trendy: @@ -107,24 +151,27 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona mat_wt[:, 1] = phi # If the arma parameters were provided, fill in the persistence - if arima_model and (not ar_estimate and not ma_estimate): + if arima_checked['arima_model'] and (not arima_checked['ar_estimate'] and not arima_checked['ma_estimate']): # Call polynomial arima_polynomials = {key: np.array(value) for key, value in adam_polynomialiser( - 0, ar_orders, i_orders, ma_orders, - ar_estimate, ma_estimate, arma_parameters, lags + 0, arima_checked['ar_orders'], arima_checked['i_orders'], arima_checked['ma_orders'], + arima_checked['ar_estimate'], arima_checked['ma_estimate'], arima_checked['arma_parameters'], lags ).items()} # Fill in the transition matrix - if non_zero_ari.shape[0] > 0: + if len(arima_checked['non_zero_ari']) > 0: + non_zero_ari = np.array(arima_checked['non_zero_ari']) mat_f[components_number_ets + non_zero_ari[:, 1], components_number_ets + non_zero_ari[:, 1]] = \ -arima_polynomials['ari_polynomial'][non_zero_ari[:, 0]] # Fill in the persistence vector - if non_zero_ari.shape[0] > 0: + if len(arima_checked['non_zero_ari']) > 0: + non_zero_ari = np.array(arima_checked['non_zero_ari']) vec_g[components_number_ets + non_zero_ari[:, 1], 0] = \ -arima_polynomials['ari_polynomial'][non_zero_ari[:, 0]] - if non_zero_ma.shape[0] > 0: + if len(arima_checked['non_zero_ma']) > 0: + non_zero_ma = np.array(arima_checked['non_zero_ma']) vec_g[components_number_ets + non_zero_ma[:, 1], 0] += \ arima_polynomials['ma_polynomial'][non_zero_ma[:, 0]] else: @@ -134,28 +181,29 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona # ETS model, initial state # If something needs to be estimated... if ets_model: - if initial_estimate: + if initials_checked['initial_estimate']: # For the seasonal models if model_is_seasonal: + if obs_nonzero >= lags_model_max * 2: # If either e_type or s_type are multiplicative, do multiplicative decomposition decomposition_type = "multiplicative" if any(x == "M" for x in [e_type, s_type]) else "additive" - y_decomposition = msdecompose(y_in_sample, [lag for lag in lags if lag != 1], type=decomposition_type) + y_decomposition = msdecompose(y_in_sample.values.ravel(), [lag for lag in lags if lag != 1], type=decomposition_type) j = 0 # level - if initial_level_estimate: + if initials_checked['initial_level_estimate']: mat_vt[j, 0:lags_model_max] = y_decomposition['initial'][0] - if xreg_model: + if explanatory_checked['xreg_model']: if e_type == "A": - mat_vt[j, 0:lags_model_max] -= np.dot(xreg_model_initials[0]['initial_xreg'], xreg_data[0]) + mat_vt[j, 0:lags_model_max] -= np.dot(explanatory_checked['xreg_model_initials'][0]['initial_xreg'], explanatory_checked['xreg_data'][0]) else: - mat_vt[j, 0:lags_model_max] /= np.exp(np.dot(xreg_model_initials[1]['initial_xreg'], xreg_data[0])) + mat_vt[j, 0:lags_model_max] /= np.exp(np.dot(explanatory_checked['xreg_model_initials'][1]['initial_xreg'], explanatory_checked['xreg_data'][0])) else: - mat_vt[j, 0:lags_model_max] = initial_level + mat_vt[j, 0:lags_model_max] = initials_checked['initial_level'] j += 1 # If trend is needed if model_is_trendy: - if initial_trend_estimate: + if initials_checked['initial_trend_estimate']: if t_type == "A" and s_type == "M": mat_vt[j, 0:lags_model_max] = np.prod(y_decomposition['initial']) - y_decomposition['initial'][0] # If the initial trend is higher than the lowest value, initialise with zero. @@ -178,14 +226,14 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona if t_type == "M" and np.any(mat_vt[0, 0:lags_model_max] < 0): mat_vt[0, 0:lags_model_max] = y_in_sample[ot_logical][0] else: - mat_vt[j, 0:lags_model_max] = initial_trend + mat_vt[j, 0:lags_model_max] = initials_checked['initial_trend'] j += 1 # Seasonal components # For pure models use stuff as is if all(x == "A" for x in [e_type, s_type]) or all(x == "M" for x in [e_type, s_type]) or (e_type == "A" and s_type == "M"): for i in range(components_number_ets_seasonal): - if initial_seasonal_estimate[i]: + if initials_checked['initial_seasonal_estimate']: mat_vt[i+j-1, 0:lags_model[i+j-1]] = y_decomposition['seasonal'][i] # Renormalise the initial seasons if s_type == "A": @@ -195,12 +243,12 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona mat_vt[i+j-1, 0:lags_model[i+j-1]] /= \ np.exp(np.mean(np.log(mat_vt[i+j-1, 0:lags_model[i+j-1]]))) else: - mat_vt[i+j-1, 0:lags_model[i+j-1]] = initial_seasonal[i] + mat_vt[i+j-1, 0:lags_model[i+j-1]] = initials_checked['initial_seasonal'][i] # For mixed models use a different set of initials elif e_type == "M" and s_type == "A": for i in range(components_number_ets_seasonal): - if initial_seasonal_estimate[i]: + if initials_checked['initial_seasonal_estimate']: mat_vt[i+j-1, 0:lags_model[i+j-1]] = np.log(y_decomposition['seasonal'][i]) * min(y_in_sample[ot_logical]) # Renormalise the initial seasons if s_type == "A": @@ -208,28 +256,28 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona else: mat_vt[i+j-1, 0:lags_model[i+j-1]] /= np.exp(np.mean(np.log(mat_vt[i+j-1, 0:lags_model[i+j-1]]))) else: - mat_vt[i+j-1, 0:lags_model[i+j-1]] = initial_seasonal[i] + mat_vt[i+j-1, 0:lags_model[i+j-1]] = initials_checked['initial_seasonal'][i] else: # If either e_type or s_type are multiplicative, do multiplicative decomposition j = 0 # level - if initial_level_estimate: + if initials_checked['initial_level_estimate']: mat_vt[j, 0:lags_model_max] = np.mean(y_in_sample[0:lags_model_max]) - if xreg_model: + if explanatory_checked['xreg_model']: if e_type == "A": - mat_vt[j, 0:lags_model_max] -= np.dot(xreg_model_initials[0]['initial_xreg'], xreg_data[0]) + mat_vt[j, 0:lags_model_max] -= np.dot(explanatory_checked['xreg_model_initials'][0]['initial_xreg'], explanatory_checked['xreg_data'][0]) else: - mat_vt[j, 0:lags_model_max] /= np.exp(np.dot(xreg_model_initials[1]['initial_xreg'], xreg_data[0])) + mat_vt[j, 0:lags_model_max] /= np.exp(np.dot(explanatory_checked['xreg_model_initials'][1]['initial_xreg'], explanatory_checked['xreg_data'][0])) else: - mat_vt[j, 0:lags_model_max] = initial_level + mat_vt[j, 0:lags_model_max] = initials_checked['initial_level'] j += 1 if model_is_trendy: - if initial_trend_estimate: + if initials_checked['initial_trend_estimate']: if t_type == "A": # trend mat_vt[j, 0:lags_model_max] = y_in_sample[1] - y_in_sample[0] elif t_type == "M": - if initial_level_estimate: + if initials_checked['initial_level_estimate']: # level fix mat_vt[j-1, 0:lags_model_max] = np.exp(np.mean(np.log(y_in_sample[ot_logical][0:lags_model_max]))) # trend @@ -238,7 +286,7 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona if t_type == "M" and np.any(mat_vt[j, 0:lags_model_max] > 1.1): mat_vt[j, 0:lags_model_max] = 1 else: - mat_vt[j, 0:lags_model_max] = initial_trend + mat_vt[j, 0:lags_model_max] = initials_checked['initial_trend'] # Do roll back. Especially useful for backcasting and multisteps if t_type == "A": @@ -250,59 +298,58 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona # Seasonal components if s_type == "A": for i in range(components_number_ets_seasonal): - if initial_seasonal_estimate[i]: + if initials_checked['initial_seasonal_estimate']: mat_vt[i+j-1, 0:lags_model[i+j-1]] = y_in_sample[0:lags_model[i+j-1]] - mat_vt[0, 0] # Renormalise the initial seasons mat_vt[i+j-1, 0:lags_model[i+j-1]] -= np.mean(mat_vt[i+j-1, 0:lags_model[i+j-1]]) else: - mat_vt[i+j-1, 0:lags_model[i+j-1]] = initial_seasonal[i] + mat_vt[i+j-1, 0:lags_model[i+j-1]] = initials_checked['initial_seasonal'][i] # For mixed models use a different set of initials else: for i in range(components_number_ets_seasonal): - if initial_seasonal_estimate[i]: + if initials_checked['initial_seasonal_estimate']: # abs() is needed for mixed ETS+ARIMA mat_vt[i+j-1, 0:lags_model[i+j-1]] = y_in_sample[0:lags_model[i+j-1]] / abs(mat_vt[0, 0]) # Renormalise the initial seasons mat_vt[i+j-1, 0:lags_model[i+j-1]] /= np.exp(np.mean(np.log(mat_vt[i+j-1, 0:lags_model[i+j-1]]))) else: - mat_vt[i+j-1, 0:lags_model[i+j-1]] = initial_seasonal[i] + mat_vt[i+j-1, 0:lags_model[i+j-1]] = initials_checked['initial_seasonal'][i] else: # Non-seasonal models # level - if initial_level_estimate: - mat_vt[0, 0:lags_model_max] = np.mean(y_in_sample[0:max(lags_model_max, int(obs_in_sample * 0.2))]) + if initials_checked['initial_level_estimate']: + mat_vt[0, 0:lags_model_max] = np.mean(y_in_sample[:max(lags_model_max, int(np.ceil(obs_in_sample * 0.2)))]) else: - mat_vt[0, 0:lags_model_max] = initial_level - + mat_vt[0, 0:lags_model_max] = initials_checked['initial_level'] if model_is_trendy: - if initial_trend_estimate: + if initials_checked['initial_trend_estimate']: if t_type == "A": mat_vt[1, 0:lags_model_max] = np.mean(np.diff(y_in_sample[0:max(lags_model_max + 1, int(obs_in_sample * 0.2))])) else: # t_type == "M" mat_vt[1, 0:lags_model_max] = np.exp(np.mean(np.diff(np.log(y_in_sample[ot_logical])))) else: - mat_vt[1, 0:lags_model_max] = initial_trend + mat_vt[1, 0:lags_model_max] = initials_checked['initial_trend'] - if initial_level_estimate and e_type == "M" and mat_vt[0, lags_model_max-1] == 0: + if initials_checked['initial_level_estimate'] and e_type == "M" and mat_vt[0, lags_model_max-1] == 0: mat_vt[0, 0:lags_model_max] = np.mean(y_in_sample) # Else, insert the provided ones... make sure that this is not a backcasting - elif not initial_estimate and initial_type == "provided": + elif not initials_checked['initial_estimate'] and initials_checked['initial_type'] == "provided": j = 0 - mat_vt[j, 0:lags_model_max] = initial_level + mat_vt[j, 0:lags_model_max] = initials_checked['initial_level'] if model_is_trendy: j += 1 - mat_vt[j, 0:lags_model_max] = initial_trend + mat_vt[j, 0:lags_model_max] = initials_checked['initial_trend'] if model_is_seasonal: for i in range(components_number_ets_seasonal): # This is misaligned, but that's okay, because this goes directly to profile_recent - mat_vt[j+i, 0:lags_model[j+i]] = initial_seasonal[i] + mat_vt[j+i, 0:lags_model[j+i]] = initials_checked['initial_seasonal'][i] j += components_number_ets_seasonal # If ARIMA orders are specified, prepare initials - if arima_model: - if initial_arima_estimate: - mat_vt[components_number_ets:components_number_ets+components_number_arima, 0:initial_arima_number] = 0 if e_type == "A" else 1 + if arima_checked['arima_model']: + if initials_checked['initial_arima_estimate']: + mat_vt[components_number_ets:components_number_ets+components_number_arima, 0:initials_checked['initial_arima_number']] = 0 if e_type == "A" else 1 if any(lag > 1 for lag in lags): y_decomposition = msdecompose(y_in_sample, [lag for lag in lags if lag != 1], @@ -310,103 +357,110 @@ def creator(ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasona else: y_decomposition = np.mean(np.diff(y_in_sample[ot_logical])) if e_type == "A" else np.exp(np.mean(np.diff(np.log(y_in_sample[ot_logical])))) - mat_vt[components_number_ets+components_number_arima-1, 0:initial_arima_number] = \ - np.tile(y_decomposition, int(np.ceil(initial_arima_number / max(lags))))[:initial_arima_number] + mat_vt[components_number_ets+components_number_arima-1, 0:initials_checked['initial_arima_number']] = \ + np.tile(y_decomposition, int(np.ceil(initials_checked['initial_arima_number'] / max(lags))))[:initials_checked['initial_arima_number']] else: - mat_vt[components_number_ets:components_number_ets+components_number_arima, 0:initial_arima_number] = 0 if e_type == "A" else 1 - mat_vt[components_number_ets+components_number_arima-1, 0:initial_arima_number] = initial_arima[:initial_arima_number] + mat_vt[components_number_ets:components_number_ets+components_number_arima, 0:initials_checked['initial_arima_number']] = 0 if e_type == "A" else 1 + mat_vt[components_number_ets+components_number_arima-1, 0:initials_checked['initial_arima_number']] = initials_checked['initial_arima'][:initials_checked['initial_arima_number']] # Fill in the initials for xreg - if xreg_model: - if e_type == "A" or initial_xreg_provided or xreg_model_initials[1] is None: - mat_vt[components_number_ets+components_number_arima:components_number_ets+components_number_arima+xreg_number, 0:lags_model_max] = \ - xreg_model_initials[0]['initial_xreg'] + if explanatory_checked['xreg_model']: + if e_type == "A" or initials_checked['initial_xreg_provided'] or explanatory_checked['xreg_model_initials'][1] is None: + mat_vt[components_number_ets+components_number_arima:components_number_ets+components_number_arima+explanatory_checked['xreg_number'], 0:lags_model_max] = \ + explanatory_checked['xreg_model_initials'][0]['initial_xreg'] else: - mat_vt[components_number_ets+components_number_arima:components_number_ets+components_number_arima+xreg_number, 0:lags_model_max] = \ - xreg_model_initials[1]['initial_xreg'] + mat_vt[components_number_ets+components_number_arima:components_number_ets+components_number_arima+explanatory_checked['xreg_number'], 0:lags_model_max] = \ + explanatory_checked['xreg_model_initials'][1]['initial_xreg'] # Add constant if needed - if constant_required: - if constant_estimate: + if constants_checked['constant_required']: + if constants_checked['constant_estimate']: # Add the mean of data - if sum(i_orders) == 0 and not ets_model: - mat_vt[components_number_ets+components_number_arima+xreg_number, :] = np.mean(y_in_sample[ot_logical]) + if sum(arima_checked['i_orders']) == 0 and not ets_model: + mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], :] = np.mean(y_in_sample[ot_logical]) # Add first differences else: if e_type == "A": - mat_vt[components_number_ets+components_number_arima+xreg_number, :] = np.mean(np.diff(y_in_sample[ot_logical])) + mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], :] = np.mean(np.diff(y_in_sample[ot_logical])) else: - mat_vt[components_number_ets+components_number_arima+xreg_number, :] = np.exp(np.mean(np.diff(np.log(y_in_sample[ot_logical])))) + mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], :] = np.exp(np.mean(np.diff(np.log(y_in_sample[ot_logical])))) else: - mat_vt[components_number_ets+components_number_arima+xreg_number, :] = constant_value + mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], :] = constants_checked['constant_value'] # If ETS model is used, change the initial level - if ets_model and initial_level_estimate: + if ets_model and initials_checked['initial_level_estimate']: if e_type == "A": - mat_vt[0, 0:lags_model_max] -= mat_vt[components_number_ets+components_number_arima+xreg_number, 0] + mat_vt[0, 0:lags_model_max] -= mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], 0] else: - mat_vt[0, 0:lags_model_max] /= mat_vt[components_number_ets+components_number_arima+xreg_number, 0] + mat_vt[0, 0:lags_model_max] /= mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], 0] # If ARIMA is done, debias states - if arima_model and initial_arima_estimate: + if arima_checked['arima_model'] and initials_checked['initial_arima_estimate']: if e_type == "A": - mat_vt[components_number_ets+non_zero_ari[:, 1], 0:initial_arima_number] -= \ - mat_vt[components_number_ets+components_number_arima+xreg_number, 0] + mat_vt[components_number_ets+non_zero_ari[:, 1], 0:initials_checked['initial_arima_number']] -= \ + mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], 0] else: - mat_vt[components_number_ets+non_zero_ari[:, 1], 0:initial_arima_number] /= \ - mat_vt[components_number_ets+components_number_arima+xreg_number, 0] + mat_vt[components_number_ets+non_zero_ari[:, 1], 0:initials_checked['initial_arima_number']] /= \ + mat_vt[components_number_ets+components_number_arima+explanatory_checked['xreg_number'], 0] else: mat_vt[:, 0:lags_model_max] = profiles_recent_table - return {'mat_vt': mat_vt, 'mat_wt': mat_wt, 'mat_f': mat_f, 'vec_g': vec_g, 'arima_polynomials': arima_polynomials} - - + return {'mat_vt': mat_vt, + 'mat_wt': mat_wt, + 'mat_f': mat_f, + 'vec_g': vec_g, + 'arima_polynomials': arima_polynomials} def initialiser( - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - components_number_ets_non_seasonal, components_number_ets_seasonal, components_number_ets, - lags, lags_model, lags_model_seasonal, lags_model_arima, lags_model_max, - mat_vt, - # Persistence values - persistence_estimate=True, - persistence_level_estimate=True, persistence_trend_estimate=True, - persistence_seasonal_estimate=True, persistence_xreg_estimate=True, - # Initials - phi_estimate=True, initial_type="optimal", initial_estimate=True, - initial_level_estimate=True, initial_trend_estimate=True, initial_seasonal_estimate=True, - initial_arima_estimate=True, initial_xreg_estimate=True, - # ARIMA elements - arima_model=False, ar_required=False, ma_required=False, - ar_estimate=True, ma_estimate=True, - ar_orders=None, ma_orders=None, i_orders=None, - components_number_arima=0, components_names_arima=None, initial_arima_number=None, - # Explanatory variables - xreg_model=False, xreg_number=0, - xreg_parameters_estimated=None, xreg_parameters_persistence=None, - # Constant and other stuff - constant_estimate=True, constant_name=None, other_parameter_estimate=False, - # Additional parameters - y_in_sample=None, ot_logical=None, bounds="usual", other=None + # Model type info + model_type_dict, + + # Components info + components_dict, + + # Lags info + lags_dict, + + # Matrices from creator + adam_created, + + # Parameter dictionaries + persistence_checked, + initials_checked, + arima_checked, + constants_checked, + explanatory_checked, + phi_dict, + + # Other parameters + observations_dict, + bounds="usual", + other=None ): + """ + Initializes parameters for ADAM models. + """ persistence_estimate_vector = [ - persistence_level_estimate, - model_is_trendy and persistence_trend_estimate, - model_is_seasonal and any(persistence_seasonal_estimate) + persistence_checked['persistence_level_estimate'], + model_type_dict["model_is_trendy"] and persistence_checked['persistence_trend_estimate'], + model_type_dict["model_is_seasonal"] and any(persistence_checked['persistence_seasonal_estimate']) ] + total_params = ( - ets_model * (sum(persistence_estimate_vector) + phi_estimate) + - xreg_model * persistence_xreg_estimate * max(xreg_parameters_persistence or [0]) + - arima_model * (ar_estimate * sum(ar_orders or []) + ma_estimate * sum(ma_orders or [])) + - ets_model * (initial_type not in ["complete", "backcasting"]) * ( - initial_level_estimate + - (model_is_trendy * initial_trend_estimate) + - (model_is_seasonal * sum(initial_seasonal_estimate * (np.array(lags_model_seasonal or []) - 1))) + model_type_dict["ets_model"] * (sum(persistence_estimate_vector) + phi_dict['phi_estimate']) + + explanatory_checked['xreg_model'] * persistence_checked['persistence_xreg_estimate'] * max(explanatory_checked['xreg_parameters_persistence'] or [0]) + + arima_checked['arima_model'] * (arima_checked['ar_estimate'] * sum(arima_checked['ar_orders'] or []) + arima_checked['ma_estimate'] * sum(arima_checked['ma_orders'] or [])) + + model_type_dict["ets_model"] * (initials_checked['initial_type'] not in ["complete", "backcasting"]) * ( + initials_checked['initial_level_estimate'] + + (model_type_dict["model_is_trendy"] * initials_checked['initial_trend_estimate']) + + (model_type_dict["model_is_seasonal"] * sum(initials_checked['initial_seasonal_estimate'] * (np.array(lags_dict["lags_model_seasonal"] or []) - 1))) ) + - (initial_type not in ["complete", "backcasting"]) * arima_model * (initial_arima_number or 0) * initial_arima_estimate + - (initial_type != "complete") * xreg_model * initial_xreg_estimate * sum(xreg_parameters_estimated or []) + - constant_estimate + other_parameter_estimate + (initials_checked['initial_type'] not in ["complete", "backcasting"]) * arima_checked['arima_model'] * (initials_checked['initial_arima_number'] or 0) * initials_checked['initial_arima_estimate'] + + (initials_checked['initial_type'] != "complete") * explanatory_checked['xreg_model'] * initials_checked['initial_xreg_estimate'] * sum(explanatory_checked['xreg_parameters_estimated'] or []) + + constants_checked['constant_estimate'] + #+ initials_checked['other_parameter_estimate'] ) B = np.full(total_params, np.nan) @@ -416,38 +470,38 @@ def initialiser( j = 0 - if ets_model: - if persistence_estimate and any(persistence_estimate_vector): - if any(ptype == "M" for ptype in [e_type, t_type, s_type]): - if ((e_type == "A" and t_type == "A" and s_type == "M") or - (e_type == "A" and t_type == "M" and s_type == "A") or - (initial_type in ["complete", "backcasting"] and - ((e_type == "M" and t_type == "A" and s_type == "A") or - (e_type == "M" and t_type == "A" and s_type == "M")))): - B[j:j+sum(persistence_estimate_vector)] = [0.01, 0] + [0] * components_number_ets_seasonal - elif e_type == "M" and t_type == "M" and s_type == "A": - B[j:j+sum(persistence_estimate_vector)] = [0, 0] + [0] * components_number_ets_seasonal - elif e_type == "M" and t_type == "A": - if initial_type in ["complete", "backcasting"]: - B[j:j+sum(persistence_estimate_vector)] = [0.1, 0] + [0.01] * components_number_ets_seasonal + if model_type_dict["ets_model"]: + if persistence_checked['persistence_estimate'] and any(persistence_estimate_vector): + if any(ptype == "M" for ptype in [model_type_dict["error_type"], model_type_dict["trend_type"], model_type_dict["season_type"]]): + if ((model_type_dict["error_type"] == "A" and model_type_dict["trend_type"] == "A" and model_type_dict["season_type"] == "M") or + (model_type_dict["error_type"] == "A" and model_type_dict["trend_type"] == "M" and model_type_dict["season_type"] == "A") or + (initials_checked['initial_type'] in ["complete", "backcasting"] and + ((model_type_dict["error_type"] == "M" and model_type_dict["trend_type"] == "A" and model_type_dict["season_type"] == "A") or + (model_type_dict["error_type"] == "M" and model_type_dict["trend_type"] == "A" and model_type_dict["season_type"] == "M")))): + B[j:j+sum(persistence_estimate_vector)] = [0.01, 0] + [0] * components_dict["components_number_ets_seasonal"] + elif model_type_dict["error_type"] == "M" and model_type_dict["trend_type"] == "M" and model_type_dict["season_type"] == "A": + B[j:j+sum(persistence_estimate_vector)] = [0, 0] + [0] * components_dict["components_number_ets_seasonal"] + elif model_type_dict["error_type"] == "M" and model_type_dict["trend_type"] == "A": + if initials_checked['initial_type'] in ["complete", "backcasting"]: + B[j:j+sum(persistence_estimate_vector)] = [0.1, 0] + [0.01] * components_dict["components_number_ets_seasonal"] else: - B[j:j+sum(persistence_estimate_vector)] = [0.2, 0.01] + [0.01] * components_number_ets_seasonal - elif e_type == "M" and t_type == "M": - B[j:j+sum(persistence_estimate_vector)] = [0.1, 0.05] + [0.01] * components_number_ets_seasonal + B[j:j+sum(persistence_estimate_vector)] = [0.2, 0.01] + [0.01] * components_dict["components_number_ets_seasonal"] + elif model_type_dict["error_type"] == "M" and model_type_dict["trend_type"] == "M": + B[j:j+sum(persistence_estimate_vector)] = [0.1, 0.05] + [0.01] * components_dict["components_number_ets_seasonal"] else: initial_values = [0.1] - if model_is_trendy: + if model_type_dict["model_is_trendy"]: initial_values.append(0.05) - if model_is_seasonal: - initial_values.extend([0.11] * components_number_ets_seasonal) + if model_type_dict["model_is_seasonal"]: + initial_values.extend([0.11] * components_dict["components_number_ets_seasonal"]) B[j:j+sum(persistence_estimate_vector)] = [val for val, estimate in zip(initial_values, persistence_estimate_vector) if estimate] else: initial_values = [0.1] - if model_is_trendy: + if model_type_dict["model_is_trendy"]: initial_values.append(0.05) - if model_is_seasonal: - initial_values.extend([0.11] * components_number_ets_seasonal) + if model_type_dict["model_is_seasonal"]: + initial_values.extend([0.11] * components_dict["components_number_ets_seasonal"]) B[j:j+sum(persistence_estimate_vector)] = [val for val, estimate in zip(initial_values, persistence_estimate_vector) if estimate] @@ -459,153 +513,154 @@ def initialiser( Bu[j:j+sum(persistence_estimate_vector)] = 5 # Names for B - if persistence_level_estimate: + if persistence_checked['persistence_level_estimate']: names.append("alpha") j += 1 - if model_is_trendy and persistence_trend_estimate: + if model_type_dict["model_is_trendy"] and persistence_checked['persistence_trend_estimate']: names.append("beta") j += 1 - if model_is_seasonal and any(persistence_seasonal_estimate): - if components_number_ets_seasonal > 1: - names.extend([f"gamma{i}" for i in range(1, components_number_ets_seasonal+1)]) + if model_type_dict["model_is_seasonal"] and any(persistence_checked['persistence_seasonal_estimate']): + if components_dict["components_number_ets_seasonal"] > 1: + names.extend([f"gamma{i}" for i in range(1, components_dict["components_number_ets_seasonal"]+1)]) else: names.append("gamma") - j += sum(persistence_seasonal_estimate) + j += sum(persistence_checked['persistence_seasonal_estimate']) - if xreg_model and persistence_xreg_estimate: - xreg_persistence_number = max(xreg_parameters_persistence) - B[j:j+xreg_persistence_number] = 0.01 if e_type == "A" else 0 + if explanatory_checked['xreg_model'] and persistence_checked['persistence_xreg_estimate']: + xreg_persistence_number = max(explanatory_checked['xreg_parameters_persistence']) + B[j:j+xreg_persistence_number] = 0.01 if model_type_dict["error_type"] == "A" else 0 Bl[j:j+xreg_persistence_number] = -5 Bu[j:j+xreg_persistence_number] = 5 names.extend([f"delta{i+1}" for i in range(xreg_persistence_number)]) j += xreg_persistence_number - if ets_model and phi_estimate: + if model_type_dict["ets_model"] and phi_dict['phi_estimate']: B[j] = 0.95 names.append("phi") Bl[j] = 0 Bu[j] = 1 j += 1 - if arima_model: - if any([ar_estimate, ma_estimate]): - acf_values = [-0.1] * sum(ma_orders * lags) - pacf_values = [0.1] * sum(ar_orders * lags) + if arima_checked['arima_model']: + if any([arima_checked['ar_estimate'], arima_checked['ma_estimate']]): + acf_values = [-0.1] * sum(arima_checked['ma_orders'] * lags_dict["lags"]) + pacf_values = [0.1] * sum(arima_checked['ar_orders'] * lags_dict["lags"]) - if not (ets_model or all(i_orders == 0)): - y_differenced = y_in_sample.copy() + if not (model_type_dict["ets_model"] or all(arima_checked['i_orders'] == 0)): + y_differenced = observations_dict['y_in_sample'].copy() # Implement differencing if needed - if any(i_orders > 0): - for i, order in enumerate(i_orders): + if any(arima_checked['i_orders'] > 0): + for i, order in enumerate(arima_checked['i_orders']): if order > 0: y_differenced = np.diff(y_differenced, n=order, axis=0) # ACF/PACF calculation for non-seasonal models - if all(np.array(lags) <= 1): - if ma_required and ma_estimate: - acf_values[:min(sum(ma_orders * lags), len(y_differenced) - 1)] = calculate_acf(y_differenced, nlags=max(1, sum(ma_orders * lags)))[1:] - if ar_required and ar_estimate: - pacf_values[:min(sum(ar_orders * lags), len(y_differenced) - 1)] = calculate_pacf(y_differenced, nlags=max(1, sum(ar_orders * lags))) + if all(np.array(lags_dict["lags"]) <= 1): + if arima_checked['ma_required'] and arima_checked['ma_estimate']: + acf_values[:min(sum(arima_checked['ma_orders'] * lags_dict["lags"]), len(y_differenced) - 1)] = calculate_acf(y_differenced, nlags=max(1, sum(arima_checked['ma_orders'] * lags_dict["lags"])))[1:] + if arima_checked['ar_required'] and arima_checked['ar_estimate']: + pacf_values[:min(sum(arima_checked['ar_orders'] * lags_dict["lags"]), len(y_differenced) - 1)] = calculate_pacf(y_differenced, nlags=max(1, sum(arima_checked['ar_orders'] * lags_dict["lags"]))) - for i, lag in enumerate(lags): - if ar_required and ar_estimate and ar_orders[i] > 0: - B[j:j+ar_orders[i]] = pacf_values[i*lag:(i+1)*lag][:ar_orders[i]] - if sum(B[j:j+ar_orders[i]]) > 1: - B[j:j+ar_orders[i]] = B[j:j+ar_orders[i]] / sum(B[j:j+ar_orders[i]]) - 0.01 - Bl[j:j+ar_orders[i]] = -5 - Bu[j:j+ar_orders[i]] = 5 - names.extend([f"phi{k+1}[{lag}]" for k in range(ar_orders[i])]) - j += ar_orders[i] + for i, lag in enumerate(lags_dict["lags"]): + if arima_checked['ar_required'] and arima_checked['ar_estimate'] and arima_checked['ar_orders'][i] > 0: + B[j:j+arima_checked['ar_orders'][i]] = pacf_values[i*lag:(i+1)*lag][:arima_checked['ar_orders'][i]] + if sum(B[j:j+arima_checked['ar_orders'][i]]) > 1: + B[j:j+arima_checked['ar_orders'][i]] = B[j:j+arima_checked['ar_orders'][i]] / sum(B[j:j+arima_checked['ar_orders'][i]]) - 0.01 + Bl[j:j+arima_checked['ar_orders'][i]] = -5 + Bu[j:j+arima_checked['ar_orders'][i]] = 5 + names.extend([f"phi{k+1}[{lag}]" for k in range(arima_checked['ar_orders'][i])]) + j += arima_checked['ar_orders'][i] - if ma_required and ma_estimate and ma_orders[i] > 0: - B[j:j+ma_orders[i]] = acf_values[i*lag:(i+1)*lag][:ma_orders[i]] - if sum(B[j:j+ma_orders[i]]) > 1: - B[j:j+ma_orders[i]] = B[j:j+ma_orders[i]] / sum(B[j:j+ma_orders[i]]) - 0.01 - Bl[j:j+ma_orders[i]] = -5 - Bu[j:j+ma_orders[i]] = 5 - names.extend([f"theta{k+1}[{lag}]" for k in range(ma_orders[i])]) - j += ma_orders[i] - - if ets_model and initial_type not in ["complete", "backcasting"] and initial_estimate: - if initial_level_estimate: - B[j] = mat_vt[0, 0] - Bl[j] = -np.inf if e_type == "A" else 0 + if arima_checked['ma_required'] and arima_checked['ma_estimate'] and arima_checked['ma_orders'][i] > 0: + B[j:j+arima_checked['ma_orders'][i]] = acf_values[i*lag:(i+1)*lag][:arima_checked['ma_orders'][i]] + if sum(B[j:j+arima_checked['ma_orders'][i]]) > 1: + B[j:j+arima_checked['ma_orders'][i]] = B[j:j+arima_checked['ma_orders'][i]] / sum(B[j:j+arima_checked['ma_orders'][i]]) - 0.01 + Bl[j:j+arima_checked['ma_orders'][i]] = -5 + Bu[j:j+arima_checked['ma_orders'][i]] = 5 + names.extend([f"theta{k+1}[{lag}]" for k in range(arima_checked['ma_orders'][i])]) + j += arima_checked['ma_orders'][i] + + if model_type_dict["ets_model"] and initials_checked['initial_type'] not in ["complete", "backcasting"] and initials_checked['initial_estimate']: + if initials_checked['initial_level_estimate']: + B[j] = adam_created['mat_vt'][0, 0] + Bl[j] = -np.inf if model_type_dict["error_type"] == "A" else 0 Bu[j] = np.inf names.append("level") j += 1 - if model_is_trendy and initial_trend_estimate: - B[j] = mat_vt[1, 0] - Bl[j] = -np.inf if t_type == "A" else 0 + if model_type_dict["model_is_trendy"] and initials_checked['initial_trend_estimate']: + B[j] = adam_created['mat_vt'][1, 0] + Bl[j] = -np.inf if model_type_dict["trend_type"] == "A" else 0 Bu[j] = np.inf names.append("trend") j += 1 - if model_is_seasonal and any(initial_seasonal_estimate): - for k in range(components_number_ets_seasonal): - if initial_seasonal_estimate[k]: - B[j:j+lags_model[components_number_ets_non_seasonal+k]-1] = mat_vt[components_number_ets_non_seasonal+k, 1:lags_model[components_number_ets_non_seasonal+k]] - if s_type == "A": - Bl[j:j+lags_model[components_number_ets_non_seasonal+k]-1] = -np.inf - Bu[j:j+lags_model[components_number_ets_non_seasonal+k]-1] = np.inf + if model_type_dict["model_is_seasonal"] and any(initials_checked['initial_seasonal_estimate']): + for k in range(components_dict["components_number_ets_seasonal"]): + if initials_checked['initial_seasonal_estimate'][k]: + B[j:j+lags_dict["lags_model_seasonal"][k]-1] = adam_created['mat_vt'][components_dict["components_number_ets"] + k, 1:lags_dict["lags_model_seasonal"][k]] + if model_type_dict["season_type"] == "A": + Bl[j:j+lags_dict["lags_model_seasonal"][k]-1] = -np.inf + Bu[j:j+lags_dict["lags_model_seasonal"][k]-1] = np.inf else: - Bl[j:j+lags_model[components_number_ets_non_seasonal+k]-1] = 0 - Bu[j:j+lags_model[components_number_ets_non_seasonal+k]-1] = np.inf - names.extend([f"seasonal{k+1}_{m}" for m in range(2, lags_model[components_number_ets_non_seasonal+k])]) - j += lags_model[components_number_ets_non_seasonal+k] - 1 - - if initial_type not in ["complete", "backcasting"] and arima_model and initial_arima_estimate: - B[j:j+initial_arima_number] = mat_vt[components_number_ets+components_number_arima, :initial_arima_number] - names.extend([f"ARIMAState{n}" for n in range(1, initial_arima_number+1)]) - if e_type == "A": - Bl[j:j+initial_arima_number] = -np.inf - Bu[j:j+initial_arima_number] = np.inf + Bl[j:j+lags_dict["lags_model_seasonal"][k]-1] = 0 + Bu[j:j+lags_dict["lags_model_seasonal"][k]-1] = np.inf + names.extend([f"seasonal{k+1}_{m}" for m in range(2, lags_dict["lags_model_seasonal"][k])]) + j += lags_dict["lags_model_seasonal"][k] - 1 + + if initials_checked['initial_type'] not in ["complete", "backcasting"] and arima_checked['arima_model'] and initials_checked['initial_arima_estimate']: + B[j:j+initials_checked['initial_arima_number']] = adam_created['mat_vt'][components_dict["components_number_ets"] + components_dict["components_number_arima"], :initials_checked['initial_arima_number']] + names.extend([f"ARIMAState{n}" for n in range(1, initials_checked['initial_arima_number']+1)]) + if model_type_dict["error_type"] == "A": + Bl[j:j+initials_checked['initial_arima_number']] = -np.inf + Bu[j:j+initials_checked['initial_arima_number']] = np.inf else: - B[j:j+initial_arima_number] = np.abs(B[j:j+initial_arima_number]) - Bl[j:j+initial_arima_number] = 0 - Bu[j:j+initial_arima_number] = np.inf - j += initial_arima_number + B[j:j+initials_checked['initial_arima_number']] = np.abs(B[j:j+initials_checked['initial_arima_number']]) + Bl[j:j+initials_checked['initial_arima_number']] = 0 + Bu[j:j+initials_checked['initial_arima_number']] = np.inf + j += initials_checked['initial_arima_number'] - if initial_type != "complete" and initial_xreg_estimate and xreg_model: - xreg_number_to_estimate = sum(xreg_parameters_estimated or []) + if initials_checked['initial_type'] != "complete" and initials_checked['initial_xreg_estimate'] and explanatory_checked['xreg_model']: + xreg_number_to_estimate = sum(explanatory_checked['xreg_parameters_estimated']) if xreg_number_to_estimate > 0: - B[j:j+xreg_number_to_estimate] = mat_vt[components_number_ets+components_number_arima:components_number_ets+components_number_arima+xreg_number, 0] + B[j:j+xreg_number_to_estimate] = adam_created['mat_vt'][components_dict["components_number_ets"] + components_dict["components_number_arima"], 0] names.extend([f"xreg{idx+1}" for idx in range(xreg_number_to_estimate)]) Bl[j:j+xreg_number_to_estimate] = -np.inf Bu[j:j+xreg_number_to_estimate] = np.inf j += xreg_number_to_estimate - if constant_estimate: + if constants_checked['constant_estimate']: j += 1 - if mat_vt.shape[0] > components_number_ets + components_number_arima + xreg_number: - B[j-1] = mat_vt[components_number_ets + components_number_arima + xreg_number, 0] + if adam_created['mat_vt'].shape[0] > components_dict["components_number_ets"] + components_dict["components_number_arima"] + explanatory_checked['xreg_number']: + B[j-1] = adam_created['mat_vt'][components_dict["components_number_ets"] + components_dict["components_number_arima"] + explanatory_checked['xreg_number'], 0] else: B[j-1] = 0 # or some other default value - names.append(constant_name or "constant") - if ets_model or (i_orders is not None and sum(i_orders) != 0): - if e_type == "A": - Bu[j-1] = np.quantile(np.diff(y_in_sample[ot_logical]), 0.6) + names.append(constants_checked['constant_name'] or "constant") + if model_type_dict["ets_model"] or (arima_checked['i_orders'] is not None and sum(arima_checked['i_orders']) != 0): + if model_type_dict["error_type"] == "A": + Bu[j-1] = np.quantile(np.diff(observations_dict['y_in_sample'][observations_dict['ot_logical']]), 0.6) Bl[j-1] = -Bu[j-1] else: - Bu[j-1] = np.exp(np.quantile(np.diff(np.log(y_in_sample[ot_logical])), 0.6)) - Bl[j-1] = np.exp(np.quantile(np.diff(np.log(y_in_sample[ot_logical])), 0.4)) + Bu[j-1] = np.exp(np.quantile(np.diff(np.log(observations_dict['y_in_sample'][observations_dict['ot_logical']])), 0.6)) + Bl[j-1] = np.exp(np.quantile(np.diff(np.log(observations_dict['y_in_sample'][observations_dict['ot_logical']])), 0.4)) if Bu[j-1] <= Bl[j-1]: Bu[j-1] = np.inf - Bl[j-1] = -np.inf if e_type == "A" else 0 + Bl[j-1] = -np.inf if model_type_dict["error_type"] == "A" else 0 if B[j-1] <= Bl[j-1]: - Bl[j-1] = -np.inf if e_type == "A" else 0 + Bl[j-1] = -np.inf if model_type_dict["error_type"] == "A" else 0 if B[j-1] >= Bu[j-1]: Bu[j-1] = np.inf else: - Bu[j-1] = max(abs(y_in_sample[ot_logical]), abs(B[j-1]) * 1.01) + Bu[j-1] = max(abs(observations_dict['y_in_sample'][observations_dict['ot_logical']]), abs(B[j-1]) * 1.01) Bl[j-1] = -Bu[j-1] - if other_parameter_estimate: - j += 1 - B[j-1] = other - names.append("other") - Bl[j-1] = 1e-10 - Bu[j-1] = np.inf + # assuming no other parameters for now + #if initials_checked['other_parameter_estimate']: + # j += 1 + # B[j-1] = other + # names.append("other") + # Bl[j-1] = 1e-10 + # Bu[j-1] = np.inf return { "B": B[:j], @@ -615,168 +670,177 @@ def initialiser( } -def filler(B, ets_model, E_type, T_type, S_type, model_is_trendy, model_is_seasonal, - components_number_ETS, components_number_ETS_non_seasonal, - components_number_ETS_seasonal, components_number_ARIMA, - lags, lags_model, lags_model_max, - mat_Vt, mat_Wt, mat_F, vec_G, - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, - initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, ar_estimate, ma_estimate, ar_orders, i_orders, ma_orders, - ar_required, ma_required, arma_parameters, - non_zero_ARI, non_zero_MA, arima_polynomials, - xreg_model, xreg_number, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_estimate): - + +def filler(B, + model_type_dict, + components_dict, + lags_dict, + matrices_dict, + persistence_checked, + initials_checked, + arima_checked, + explanatory_checked, + phi_dict, + constants_checked): + """ + Updates model matrices based on parameter values. + """ j = 0 # Fill in persistence - if persistence_estimate: + if persistence_checked['persistence_estimate']: # Persistence of ETS - if ets_model: + if model_type_dict['ets_model']: i = 0 # alpha - if persistence_level_estimate: + if persistence_checked['persistence_level_estimate']: j += 1 - vec_G[i] = B[j-1] + matrices_dict['vec_g'][i] = B[j-1] # beta - if model_is_trendy: + if model_type_dict['model_is_trendy']: i = 1 - if persistence_trend_estimate: + if persistence_checked['persistence_trend_estimate']: j += 1 - vec_G[i] = B[j-1] + matrices_dict['vec_g'][i] = B[j-1] # gamma1, gamma2, ... - if model_is_seasonal: - if any(persistence_seasonal_estimate): - vec_G[i + np.where(persistence_seasonal_estimate)[0]] = B[j:j+sum(persistence_seasonal_estimate)] - j += sum(persistence_seasonal_estimate) - i = components_number_ETS - 1 + if model_type_dict['model_is_seasonal']: + if any(persistence_checked['persistence_seasonal_estimate']): + matrices_dict['vec_g'][i + np.where(persistence_checked['persistence_seasonal_estimate'])[0]] = B[j:j+sum(persistence_checked['persistence_seasonal_estimate'])] + j += sum(persistence_checked['persistence_seasonal_estimate']) + i = components_dict['components_number_ets'] - 1 # Persistence of xreg - if xreg_model and persistence_xreg_estimate: - xreg_persistence_number = max(xreg_parameters_persistence) - vec_G[j + components_number_ARIMA:j + components_number_ARIMA + len(xreg_parameters_persistence)] = \ - B[j:j+xreg_persistence_number][np.array(xreg_parameters_persistence) - 1] + if explanatory_checked['xreg_model'] and persistence_checked['persistence_xreg_estimate']: + xreg_persistence_number = max(explanatory_checked['xreg_parameters_persistence']) + matrices_dict['vec_g'][j + components_dict['components_number_arima']:j + components_dict['components_number_arima'] + len(explanatory_checked['xreg_parameters_persistence'])] = \ + B[j:j+xreg_persistence_number][np.array(explanatory_checked['xreg_parameters_persistence']) - 1] j += xreg_persistence_number # Damping parameter - if ets_model and phi_estimate: + if model_type_dict['ets_model'] and phi_dict['phi_estimate']: j += 1 - mat_Wt[:, 1] = B[j-1] - mat_F[0:2, 1] = B[j-1] + matrices_dict['mat_wt'][:, 1] = B[j-1] + matrices_dict['mat_f'][0:2, 1] = B[j-1] # ARMA parameters - if arima_model: + if arima_checked['arima_model']: # Call the function returning ARI and MA polynomials arima_polynomials = adam_polynomialiser( - B[j:j+sum(np.array(ar_orders)*ar_estimate + np.array(ma_orders)*ma_estimate)], - ar_orders, i_orders, ma_orders, - ar_estimate, ma_estimate, arma_parameters, lags + B[j:j+sum(np.array(arima_checked['ar_orders'])*arima_checked['ar_estimate'] + + np.array(arima_checked['ma_orders'])*arima_checked['ma_estimate'])], + arima_checked['ar_orders'], arima_checked['i_orders'], arima_checked['ma_orders'], + arima_checked['ar_estimate'], arima_checked['ma_estimate'], + arima_checked['arma_parameters'], lags_dict['lags'] ) arima_polynomials = {k: np.array(v) for k, v in arima_polynomials.items()} # Fill in the transition matrix - if non_zero_ARI.shape[0] > 0: - mat_F[components_number_ETS + non_zero_ARI[:, 1], - components_number_ETS:components_number_ETS + components_number_ARIMA + constant_estimate] = \ - -arima_polynomials['ariPolynomial'][non_zero_ARI[:, 0]] + if len(arima_checked['non_zero_ari']) > 0: + matrices_dict['mat_f'][components_dict['components_number_ets'] + arima_checked['non_zero_ari'][:, 1], + components_dict['components_number_ets']:components_dict['components_number_ets'] + components_dict['components_number_arima'] + constants_checked['constant_estimate']] = \ + -arima_polynomials['ariPolynomial'][arima_checked['non_zero_ari'][:, 0]] # Fill in the persistence vector - if non_zero_ARI.shape[0] > 0: - vec_G[components_number_ETS + non_zero_ARI[:, 1]] = -arima_polynomials['ariPolynomial'][non_zero_ARI[:, 0]] - if non_zero_MA.shape[0] > 0: - vec_G[components_number_ETS + non_zero_MA[:, 1]] += arima_polynomials['maPolynomial'][non_zero_MA[:, 0]] + if len(arima_checked['non_zero_ari']) > 0: + matrices_dict['vec_g'][components_dict['components_number_ets'] + arima_checked['non_zero_ari'][:, 1]] = -arima_polynomials['ariPolynomial'][arima_checked['non_zero_ari'][:, 0]] + if len(arima_checked['non_zero_ma']) > 0: + matrices_dict['vec_g'][components_dict['components_number_ets'] + arima_checked['non_zero_ma'][:, 1]] += arima_polynomials['maPolynomial'][arima_checked['non_zero_ma'][:, 0]] - j += sum(np.array(ar_orders)*ar_estimate + np.array(ma_orders)*ma_estimate) + j += sum(np.array(arima_checked['ar_orders'])*arima_checked['ar_estimate'] + + np.array(arima_checked['ma_orders'])*arima_checked['ma_estimate']) # Initials of ETS - if ets_model and all(initial_type != ['complete', 'backcasting']) and initial_estimate: + if model_type_dict['ets_model'] and initials_checked['initial_type'] not in ['complete', 'backcasting'] and initials_checked['initial_estimate']: i = 0 - if initial_level_estimate: + if initials_checked['initial_level_estimate']: j += 1 - mat_Vt[i, :lags_model_max] = B[j-1] + matrices_dict['mat_vt'][i, :lags_dict['lags_model_max']] = B[j-1] i += 1 - if model_is_trendy and initial_trend_estimate: + if model_type_dict['model_is_trendy'] and initials_checked['initial_trend_estimate']: j += 1 - mat_Vt[i, :lags_model_max] = B[j-1] + matrices_dict['mat_vt'][i, :lags_dict['lags_model_max']] = B[j-1] i += 1 - if model_is_seasonal and any(initial_seasonal_estimate): - for k in range(components_number_ETS_seasonal): - if initial_seasonal_estimate[k]: - mat_Vt[components_number_ETS_non_seasonal + k, - 1:lags_model[components_number_ETS_non_seasonal + k] - 1] = \ - B[j:j+lags_model[components_number_ETS_non_seasonal + k] - 2] - if S_type == "A": - mat_Vt[components_number_ETS_non_seasonal + k, - lags_model[components_number_ETS_non_seasonal + k] - 1] = \ - -np.sum(B[j:j+lags_model[components_number_ETS_non_seasonal + k] - 2]) + if model_type_dict['model_is_seasonal'] and any(initials_checked['initial_seasonal_estimate']): + for k in range(components_dict['components_number_ets_seasonal']): + if initials_checked['initial_seasonal_estimate'][k]: + matrices_dict['mat_vt'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k, + 1:lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k]] = \ + B[j:j+lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k] - 2] + if model_type_dict['season_type'] == "A": + matrices_dict['mat_vt'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k, + lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k] - 1] = \ + -np.sum(B[j:j+lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k] - 2]) else: # "M" - mat_Vt[components_number_ETS_non_seasonal + k, - lags_model[components_number_ETS_non_seasonal + k] - 1] = \ - 1 / np.prod(B[j:j+lags_model[components_number_ETS_non_seasonal + k] - 2]) - j += lags_model[components_number_ETS_non_seasonal + k] - 1 + matrices_dict['mat_vt'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k, + lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k] - 1] = \ + 1 / np.prod(B[j:j+lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k] - 2]) + j += lags_dict['lags_model'][components_dict['components_number_ets'] - components_dict['components_number_ets_seasonal'] + k] - 1 # Initials of ARIMA - if arima_model: - if all(initial_type != ['complete', 'backcasting']) and initial_arima_estimate: - mat_Vt[components_number_ETS + components_number_ARIMA - 1, :initial_arima_number] = B[j:j+initial_arima_number] - if E_type == "A": - mat_Vt[components_number_ETS + non_zero_ARI[:, 1], :initial_arima_number] = \ - np.dot(arima_polynomials['ariPolynomial'][non_zero_ARI[:, 0]], - B[j:j+initial_arima_number].reshape(1, -1)) / arima_polynomials['ariPolynomial'][-1] + if arima_checked['arima_model']: + if initials_checked['initial_type'] not in ['complete', 'backcasting'] and initials_checked['initial_arima_estimate']: + matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] - 1, + :initials_checked['initial_arima_number']] = B[j:j+initials_checked['initial_arima_number']] + if model_type_dict['error_type'] == "A": + matrices_dict['mat_vt'][components_dict['components_number_ets'] + arima_checked['non_zero_ari'][:, 1], + :initials_checked['initial_arima_number']] = \ + np.dot(arima_polynomials['ariPolynomial'][arima_checked['non_zero_ari'][:, 0]], + B[j:j+initials_checked['initial_arima_number']].reshape(1, -1)) / arima_polynomials['ariPolynomial'][-1] else: # "M" - mat_Vt[components_number_ETS + non_zero_ARI[:, 1], :initial_arima_number] = \ - np.exp(np.dot(arima_polynomials['ariPolynomial'][non_zero_ARI[:, 0]], - np.log(B[j:j+initial_arima_number]).reshape(1, -1)) / arima_polynomials['ariPolynomial'][-1]) - j += initial_arima_number - elif any([ar_estimate, ma_estimate]): - if E_type == "A": - mat_Vt[components_number_ETS + non_zero_ARI[:, 1], :initial_arima_number] = \ - np.dot(arima_polynomials['ariPolynomial'][non_zero_ARI[:, 0]], - mat_Vt[components_number_ETS + components_number_ARIMA - 1, :initial_arima_number].reshape(1, -1)) / \ + matrices_dict['mat_vt'][components_dict['components_number_ets'] + arima_checked['non_zero_ari'][:, 1], + :initials_checked['initial_arima_number']] = \ + np.exp(np.dot(arima_polynomials['ariPolynomial'][arima_checked['non_zero_ari'][:, 0]], + np.log(B[j:j+initials_checked['initial_arima_number']]).reshape(1, -1)) / arima_polynomials['ariPolynomial'][-1]) + j += initials_checked['initial_arima_number'] + elif any([arima_checked['ar_estimate'], arima_checked['ma_estimate']]): + if model_type_dict['error_type'] == "A": + matrices_dict['mat_vt'][components_dict['components_number_ets'] + arima_checked['non_zero_ari'][:, 1], + :initials_checked['initial_arima_number']] = \ + np.dot(arima_polynomials['ariPolynomial'][arima_checked['non_zero_ari'][:, 0]], + matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] - 1, + :initials_checked['initial_arima_number']].reshape(1, -1)) / \ arima_polynomials['ariPolynomial'][-1] else: # "M" - mat_Vt[components_number_ETS + non_zero_ARI[:, 1], :initial_arima_number] = \ - np.exp(np.dot(arima_polynomials['ariPolynomial'][non_zero_ARI[:, 0]], - np.log(mat_Vt[components_number_ETS + components_number_ARIMA - 1, :initial_arima_number]).reshape(1, -1)) / \ + matrices_dict['mat_vt'][components_dict['components_number_ets'] + arima_checked['non_zero_ari'][:, 1], + :initials_checked['initial_arima_number']] = \ + np.exp(np.dot(arima_polynomials['ariPolynomial'][arima_checked['non_zero_ari'][:, 0]], + np.log(matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] - 1, + :initials_checked['initial_arima_number']]).reshape(1, -1)) / \ arima_polynomials['ariPolynomial'][-1]) # Initials of the xreg - if xreg_model and (initial_type != "complete") and initial_estimate and initial_xreg_estimate: - xreg_number_to_estimate = sum(xreg_parameters_estimated) - mat_Vt[components_number_ETS + components_number_ARIMA + np.where(xreg_parameters_estimated == 1)[0], - :lags_model_max] = B[j:j+xreg_number_to_estimate] + if explanatory_checked['xreg_model'] and (initials_checked['initial_type'] != "complete") and initials_checked['initial_estimate'] and initials_checked['initial_xreg_estimate']: + xreg_number_to_estimate = sum(explanatory_checked['xreg_parameters_estimated']) + matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] + + np.where(explanatory_checked['xreg_parameters_estimated'] == 1)[0], + :lags_dict['lags_model_max']] = B[j:j+xreg_number_to_estimate] j += xreg_number_to_estimate # Normalise initials - for i in np.where(xreg_parameters_missing != 0)[0]: - mat_Vt[components_number_ETS + components_number_ARIMA + i, :lags_model_max] = \ - -np.sum(mat_Vt[components_number_ETS + components_number_ARIMA + - np.where(xreg_parameters_included == xreg_parameters_missing[i])[0], - :lags_model_max]) + for i in np.where(explanatory_checked['xreg_parameters_missing'] != 0)[0]: + matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] + i, + :lags_dict['lags_model_max']] = \ + -np.sum(matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] + + np.where(explanatory_checked['xreg_parameters_included'] == + explanatory_checked['xreg_parameters_missing'][i])[0], + :lags_dict['lags_model_max']]) # Constant - if constant_estimate: - mat_Vt[components_number_ETS + components_number_ARIMA + xreg_number, :] = B[j] + if constants_checked['constant_estimate']: + matrices_dict['mat_vt'][components_dict['components_number_ets'] + components_dict['components_number_arima'] + + explanatory_checked['xreg_number'], :] = B[j] return { - 'mat_Vt': mat_Vt, - 'mat_Wt': mat_Wt, - 'mat_F': mat_F, - 'vec_G': vec_G, - 'arima_polynomials': arima_polynomials + 'mat_vt': matrices_dict['mat_vt'], + 'mat_wt': matrices_dict['mat_wt'], + 'mat_f': matrices_dict['mat_f'], + 'vec_g': matrices_dict['vec_g'], + 'arima_polynomials': matrices_dict['arima_polynomials'] } - def adam_profile_creator( lags_model_all: List[List[int]], lags_model_max: int, @@ -850,169 +914,61 @@ def adam_profile_creator( "lookup": np.array(index_lookup_table, dtype="int64"), } -def parameters_checker(data: Union[np.ndarray, pd.Series, pd.DataFrame], - model: str = "ZXZ", - lags: Union[List[int], None] = None, - orders: Union[List[int], Dict[str, List[int]], None] = None, - formula: Union[str, None] = None, - constant: bool = True, - distribution: str = "dnorm", - loss: str = "likelihood", - h: int = 10, - holdout: bool = False, - persistence: Union[List[float], Dict[str, Union[float, List[float]]], None] = None, - phi: Union[float, None] = None, - initial: Union[str, List[float], Dict[str, Union[float, List[float]]], None] = None, - **kwargs: Any) -> Dict[str, Any]: - """ - Checks and processes input parameters for ADAM models. - Args: - data: Input data (numpy array, pandas Series, or DataFrame). - model: Model type string. - lags: List of lag values. - orders: ARIMA orders as list [p, d, q] or dict {'ar': p, 'i': d, 'ma': q}. - formula: Formula string for regression component. - constant: Whether to include a constant term. - distribution: Error distribution type. - loss: Loss function to use. - h: Forecast horizon. - holdout: Whether to use holdout for evaluation. - persistence: Persistence parameters. - phi: Damping parameter. - initial: Initial values for model components. - **kwargs: Additional keyword arguments. - - Returns: - A dictionary of processed parameters. - - Raises: - ValueError: If any of the input parameters are invalid. - """ - # Check data - if isinstance(data, pd.DataFrame): - y = data.iloc[:, 0].values - xreg = data.iloc[:, 1:].values if data.shape[1] > 1 else None - elif isinstance(data, pd.Series): - y = data.values - xreg = None - elif isinstance(data, np.ndarray): - if data.ndim == 1: - y = data - xreg = None - elif data.ndim == 2: - y = data[:, 0] - xreg = data[:, 1:] if data.shape[1] > 1 else None - else: - raise ValueError("data must be 1D or 2D array-like") - else: - raise ValueError("data must be pandas DataFrame, Series, or numpy array") - - # Check model - if not isinstance(model, str): - raise ValueError("model must be a string") - - # Check lags - if lags is None: - lags = [1] # Default to 1 if not provided - if not isinstance(lags, list): - raise ValueError("lags must be a list of integers or None") - - # Check orders - if orders is not None: - if isinstance(orders, list): - if len(orders) != 3: - raise ValueError("orders as list must have 3 elements: [p, d, q]") - elif isinstance(orders, dict): - if not all(key in orders for key in ['ar', 'i', 'ma']): - raise ValueError("orders as dict must have keys: 'ar', 'i', 'ma'") - else: - raise ValueError("orders must be a list, dict, or None") - - # Check formula - if formula is not None and not isinstance(formula, str): - raise ValueError("formula must be a string or None") +def architector( + # Model type info + model_type_dict: Dict[str, Any], - # Check distribution - valid_distributions = ["dnorm", "dlaplace", "ds", "dgnorm", "dlnorm", "dgamma", "dinvgauss"] - if distribution not in valid_distributions: - raise ValueError(f"distribution must be one of {valid_distributions}") + # Lags info + lags_dict: Dict[str, Any], - # Check loss - valid_losses = ["likelihood", "MSE", "MAE", "HAM", "LASSO", "RIDGE", "TMSE", "GTMSE", "MSEh", "MSCE"] - if loss not in valid_losses and not callable(loss): - raise ValueError(f"loss must be one of {valid_losses} or a callable function") + # Observation info + observations_dict: Dict[str, Any], + + # Optional model components + arima_checked: Dict[str, Any] = None, + explanatory_checked: Dict[str, Any] = None, + constants_checked: Dict[str, Any] = None, - # Check h and holdout - if not isinstance(h, int) or h <= 0: - raise ValueError("h must be a positive integer") - if not isinstance(holdout, bool): - raise ValueError("holdout must be a boolean") - - # Check persistence - if persistence is not None: - if not isinstance(persistence, (list, dict)): - raise ValueError("persistence must be a list, dict, or None") - - # Check phi - if phi is not None and not isinstance(phi, (int, float)): - raise ValueError("phi must be a number or None") - - # Check initial - valid_initial_str = ["optimal", "backcasting", "complete"] - if initial is not None: - if isinstance(initial, str) and initial not in valid_initial_str: - raise ValueError(f"initial as string must be one of {valid_initial_str}") - elif not isinstance(initial, (str, list, dict)): - raise ValueError("initial must be a string, list, dict, or None") - - # Return the processed parameters - return { - "y": y, - "xreg": xreg, - "model": model, - "lags": lags, - "orders": orders, - "formula": formula, - "constant": constant, - "distribution": distribution, - "loss": loss, - "h": h, - "holdout": holdout, - "persistence": persistence, - "phi": phi, - "initial": initial, - **kwargs - } - -def architector(ets_model: bool, E_type: str, T_type: str, S_type: str, - lags: List[int], lags_model_seasonal: List[int], - xreg_number: int, obs_in_sample: int, initial_type: str, - arima_model: bool, lags_model_ARIMA: List[int], - xreg_model: bool, constant_required: bool, - profiles_recent_table: Union[np.ndarray, None] = None, - profiles_recent_provided: bool = False) -> Dict[str, Any]: + # Profiles + profiles_recent_table: Union[np.ndarray, None] = None, + profiles_recent_provided: bool = False +) -> Dict[str, Any]: """ Constructs the architecture for ADAM models. Args: - ets_model: Whether ETS model is included. - E_type, T_type, S_type: ETS model types for error, trend, and seasonality. - lags: List of lag values. - lags_model_seasonal: List of seasonal lags. - xreg_number: Number of external regressors. - obs_in_sample: Number of in-sample observations. - initial_type: Type of initial values. - arima_model: Whether ARIMA model is included. - lags_model_ARIMA: List of ARIMA lags. - xreg_model: Whether external regressors are included. - constant_required: Whether a constant term is required. - profiles_recent_table: Pre-computed recent profiles table (optional). - profiles_recent_provided: Whether profiles_recent_table is provided. + model_type_dict: Dictionary containing model type information (ets_model, error_type, etc.) + lags_dict: Dictionary containing lags information + observations_dict: Dictionary containing observation information + initial_type: Type of initial values + arima_checked: Dictionary containing ARIMA model parameters + explanatory_checked: Dictionary containing explanatory variables info + constants_checked: Dictionary containing constant term info + profiles_recent_table: Pre-computed recent profiles table (optional) + profiles_recent_provided: Whether profiles_recent_table is provided Returns: - A dictionary containing the model architecture components. + Dictionary containing model architecture components """ + # Extract values from dictionaries + ets_model = model_type_dict["ets_model"] + E_type = model_type_dict["error_type"] + T_type = model_type_dict["trend_type"] + S_type = model_type_dict["season_type"] + + lags = lags_dict["lags"] + lags_model_seasonal = lags_dict.get("lags_model_seasonal", []) + + # Set defaults for optional parameters + arima_model = False if arima_checked is None else arima_checked["arima_model"] + lags_model_ARIMA = [] if arima_checked is None else arima_checked.get("lags_model_arima", []) + + xreg_model = False if explanatory_checked is None else explanatory_checked["xreg_model"] + xreg_number = 0 if explanatory_checked is None else explanatory_checked.get("xreg_number", 0) + + constant_required = False if constants_checked is None else constants_checked["constant_required"] + components = {} # If there is ETS @@ -1044,12 +1000,16 @@ def architector(ets_model: bool, E_type: str, T_type: str, S_type: str, else: model_is_trendy = model_is_seasonal = False components_number_ETS = components_number_ETS_seasonal = 0 - components_names_ETS = None - lags_model_all = lags_model = None + components_names_ETS = [] + lags_model_all = lags_model = [] # If there is ARIMA + components_number_ARIMA = 0 + components_names_ARIMA = [] if arima_model: lags_model_all = lags_model + [[lag] for lag in lags_model_ARIMA] + components_number_ARIMA = len(lags_model_ARIMA) + components_names_ARIMA = [f"arima{i+1}" for i in range(components_number_ARIMA)] # If constant is needed, add it if constant_required: @@ -1059,13 +1019,13 @@ def architector(ets_model: bool, E_type: str, T_type: str, S_type: str, if xreg_model: lags_model_all.extend([[1]] * xreg_number) - lags_model_max = max(max(lag) for lag in lags_model_all) + lags_model_max = max(max(lag) for lag in lags_model_all) if lags_model_all else 1 # Define the number of cols that should be in the matvt - obs_states = obs_in_sample + lags_model_max + obs_states = observations_dict["obs_in_sample"] + lags_model_max # Create ADAM profiles for correct treatment of seasonality - adam_profiles = adam_profile_creator(lags_model_all, lags_model_max, obs_in_sample + lags_model_max, + adam_profiles = adam_profile_creator(lags_model_all, lags_model_max, observations_dict["obs_in_sample"] + lags_model_max, lags=lags, y_index=None, y_classes=None) if profiles_recent_provided: profiles_recent_table = profiles_recent_table[:, :lags_model_max] @@ -1073,18 +1033,42 @@ def architector(ets_model: bool, E_type: str, T_type: str, S_type: str, profiles_recent_table = adam_profiles['recent'] index_lookup_table = adam_profiles['lookup'] - components.update({ - 'model_is_trendy': model_is_trendy, - 'model_is_seasonal': model_is_seasonal, - 'components_number_ETS': components_number_ETS, - 'components_number_ETS_seasonal': components_number_ETS_seasonal, - 'components_names_ETS': components_names_ETS, - 'lags_model': lags_model, - 'lags_model_all': lags_model_all, - 'lags_model_max': lags_model_max, - 'obs_states': obs_states, - 'profiles_recent_table': profiles_recent_table, - 'index_lookup_table': index_lookup_table + # Update model type info + model_type_dict.update({ + "model_is_trendy": model_is_trendy, + "model_is_seasonal": model_is_seasonal }) - return components \ No newline at end of file + # Create components dict + components_dict = { + "components_number_ets": components_number_ETS, + "components_number_ets_seasonal": components_number_ETS_seasonal, + "components_names_ets": components_names_ETS, + "components_number_arima": components_number_ARIMA, + "components_names_arima": components_names_ARIMA + } + + # Update lags dict + lags_dict.update({ + "lags_model": lags_model, + "lags_model_all": lags_model_all, + "lags_model_max": lags_model_max + }) + + # Update observations dict with new info + observations_dict["obs_states"] = obs_states + + profile_dict = { + "profiles_recent_table": profiles_recent_table, + 'profiles_recent_provided': profiles_recent_provided, + "index_lookup_table": index_lookup_table + } + + # Return all required information + return ( + model_type_dict, + components_dict, + lags_dict, + observations_dict, + profile_dict + ) \ No newline at end of file diff --git a/python/smooth/adam_general/core/estimator.py b/python/smooth/adam_general/core/estimator.py index b48d24d4..f48c8dd8 100644 --- a/python/smooth/adam_general/core/estimator.py +++ b/python/smooth/adam_general/core/estimator.py @@ -1,267 +1,261 @@ import numpy as np -from adam_profile import architector -from python.smooth.adam_general.core.creator import creator import nlopt -from python.smooth.adam_general.core.utils.ic import ic_function +from core.utils.ic import ic_function import pandas as pd - - - - - -def estimator(ets_model, e_type, t_type, s_type, lags, lags_model_seasonal, lags_model_arima, - obs_states, obs_in_sample, - y_in_sample, persistence, persistence_estimate, - persistence_level, persistence_level_estimate, - persistence_trend, persistence_trend_estimate, - persistence_seasonal, persistence_seasonal_estimate, - persistence_xreg, persistence_xreg_estimate, persistence_xreg_provided, - phi, phi_estimate, - initial_type, initial_level, initial_trend, initial_seasonal, - initial_arima, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, initial_xreg_provided, - arima_model, ar_required, i_required, ma_required, arma_parameters, - components_number_arima, components_names_arima, - formula, xreg_model, xreg_model_initials, xreg_data, xreg_number, xreg_names, regressors, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_required, constant_estimate, constant_value, constant_name, - ot, ot_logical, occurrence_model, p_fitted, - bounds, loss, loss_function, distribution, - horizon, multisteps, other, other_parameter_estimate, lambda_param): +from core.creator import initialiser, creator, architector +from core.utils.cost_functions import CF, log_Lik_ADAM +from smooth.adam_general._adam_general import adam_fitter, adam_forecaster +from core.creator import creator, initialiser, architector, filler +import warnings +from core.utils.utils import scaler + + +def estimator( + general_dict, + model_type_dict, + lags_dict, + observations_dict, + arima_dict, + constant_dict, + explanatory_dict, + profiles_recent_table, + profiles_recent_provided, + persistence_dict, + initials_dict, + phi_dict, + components_dict, + occurrence_dict, + + multisteps = False, + lb = None, + ub = None, + maxtime = None, + print_level = 1, # 1 or 0 + maxeval = None, +): # Create the basic variables - adam_architect = architector( - ets_model, e_type, t_type, s_type, lags, lags_model_seasonal, - xreg_number, obs_in_sample, initial_type, - arima_model, lags_model_arima, xreg_model, constant_required, - profiles_recent_table, profiles_recent_provided - ) + model_type_dict, components_dict, lags_dict, observations_dict, profile_dict = architector( + model_type_dict = model_type_dict, + lags_dict = lags_dict, + observations_dict = observations_dict, + arima_checked = arima_dict, + constants_checked = constant_dict, + explanatory_checked = explanatory_dict, + profiles_recent_table = profiles_recent_table, + profiles_recent_provided = profiles_recent_provided +) # Create the matrices for the specific ETS model adam_created = creator( - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - lags, lags_model, lags_model_arima, lags_model_all, lags_model_max, - profiles_recent_table, profiles_recent_provided, - obs_states, obs_in_sample, obs_all, components_number_ets, components_number_ets_seasonal, - components_names_ets, ot_logical, y_in_sample, - persistence, persistence_estimate, - persistence_level, persistence_level_estimate, persistence_trend, persistence_trend_estimate, - persistence_seasonal, persistence_seasonal_estimate, - persistence_xreg, persistence_xreg_estimate, persistence_xreg_provided, - phi, - initial_type, initial_estimate, - initial_level, initial_level_estimate, initial_trend, initial_trend_estimate, - initial_seasonal, initial_seasonal_estimate, - initial_arima, initial_arima_estimate, initial_arima_number, - initial_xreg_estimate, initial_xreg_provided, - arima_model, ar_required, i_required, ma_required, arma_parameters, - ar_orders, i_orders, ma_orders, - components_number_arima, components_names_arima, - xreg_model, xreg_model_initials, xreg_data, xreg_number, xreg_names, - xreg_parameters_persistence, - constant_required, constant_estimate, constant_value, constant_name + model_type_dict = model_type_dict, + lags_dict = lags_dict, + profiles_dict = profile_dict, + observations_dict = observations_dict, + + persistence_checked = persistence_dict, + initials_checked = initials_dict, + arima_checked = arima_dict, + constants_checked = constant_dict, + phi_dict = phi_dict, + components_dict = components_dict, + explanatory_checked = explanatory_dict ) + # Initialize B # Initialize B b_values = initialiser( - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - components_number_ets_non_seasonal, components_number_ets_seasonal, components_number_ets, - lags, lags_model, lags_model_seasonal, lags_model_arima, lags_model_max, - adam_created['mat_vt'], - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, ar_required, ma_required, ar_estimate, ma_estimate, ar_orders, ma_orders, - components_number_arima, components_names_arima, initial_arima_number, - xreg_model, xreg_number, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_estimate, constant_name, other_parameter_estimate + model_type_dict = model_type_dict, + components_dict = components_dict, + lags_dict = lags_dict, + adam_created = adam_created, + persistence_checked = persistence_dict, + initials_checked = initials_dict, + arima_checked = arima_dict, + constants_checked = constant_dict, + explanatory_checked = explanatory_dict, + observations_dict = observations_dict, + bounds = general_dict['bounds'], + phi_dict = phi_dict, ) - if B is not None: - if isinstance(B, dict): - B = {k: v for k, v in B.items() if k in b_values['B']} - b_values['B'].update(B) - else: - b_values['B'][:] = B - B = dict(zip(b_values['B'].keys(), B)) - # Continue with the rest of the function... + # The following is a translation from R -> why do we need it? + #B = b_values['B'] + #if B is not None: + # if isinstance(B, dict): + # B = {k: v for k, v in B.items() if k in b_values['B']} + # b_values['B'].update(B) + #else: + # b_values['B'][:] = B + # B = dict(zip(b_values['names'], B)) + + # Instead I do this: + # Create the vector of initials for the optimisation + #if B is None: + B = b_values['B'] + #if lb is None: + lb = b_values['Bl'] + #if ub is None: + ub = b_values['Bu'] + + + #if(!is.null(B)){ + # if(!is.null(names(B))){ + # B <- B[names(B) %in% names(BValues$B)]; + # BValues$B[] <- B; + # } + # else{ + # BValues$B[] <- B; + # names(B) <- names(BValues$B); + # } + #} + + + + # Preheat the initial state of ARIMA. Do this only for optimal initials and if B is not provided - if arima_model and initial_type == "optimal" and initial_arima_estimate and B is None: - adam_created_arima = filler( - b_values['B'], - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - components_number_ets, components_number_ets_non_seasonal, - components_number_ets_seasonal, components_number_arima, - lags, lags_model, lags_model_max, - adam_created['mat_vt'], adam_created['mat_wt'], adam_created['mat_f'], adam_created['vec_g'], - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, - initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, ar_estimate, ma_estimate, ar_orders, i_orders, ma_orders, - ar_required, ma_required, arma_parameters, - non_zero_ari, non_zero_ma, adam_created['arima_polynomials'], - xreg_model, xreg_number, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, constant_estimate - ) + if model_type_dict['arima_model'] and initials_dict['initial_type'] == "optimal" and initials_dict['initial_arima_estimate'] and B is None: + ... # will add later! + - # Write down the initials in the recent profile - profiles_recent_table[:] = adam_created_arima['mat_vt'][:, :lags_model_max] + - # Do initial fit to get the state values from the backcasting - adam_fitted = adam_fitter_wrap( - adam_created_arima['mat_vt'], adam_created_arima['mat_wt'], adam_created_arima['mat_f'], adam_created_arima['vec_g'], - lags_model_all, index_lookup_table, profiles_recent_table, - e_type, t_type, s_type, components_number_ets, components_number_ets_seasonal, - components_number_arima, xreg_number, constant_required, - y_in_sample, ot, True - ) + # Companion matrices for the polynomials calculation -> stationarity / stability checks + if model_type_dict['arima_model']: + # AR polynomials + ar_polynomial_matrix = np.zeros((np.sum(arima_dict['ar_orders']) * lags_dict['lags'], np.sum(arima_dict['ar_orders']) * lags_dict['lags'])) + if ar_polynomial_matrix.shape[0] > 1: + ar_polynomial_matrix[1:, :-1] = np.eye(ar_polynomial_matrix.shape[0] - 1) + # MA polynomials + ma_polynomial_matrix = np.zeros((np.sum(arima_dict['ma_orders']) * lags_dict['lags'], np.sum(arima_dict['ma_orders']) * lags_dict['lags'])) + if ma_polynomial_matrix.shape[0] > 1: + ma_polynomial_matrix[1:, :-1] = np.eye(ma_polynomial_matrix.shape[0] - 1) + else: + ma_polynomial_matrix = ar_polynomial_matrix = None - adam_created['mat_vt'][:, :lags_model_max] = adam_fitted['mat_vt'][:, :lags_model_max] - # Produce new initials - b_values_new = initialiser( - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - components_number_ets_non_seasonal, components_number_ets_seasonal, components_number_ets, - lags, lags_model, lags_model_seasonal, lags_model_arima, lags_model_max, - adam_created['mat_vt'], - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, ar_required, ma_required, ar_estimate, ma_estimate, ar_orders, ma_orders, - components_number_arima, components_names_arima, initial_arima_number, - xreg_model, xreg_number, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_estimate, constant_name, other_parameter_estimate - ) - B = b_values_new['B'] - # Failsafe, just in case if the initial values contain NA / NaN - B[np.isnan(B)] = b_values['B'][np.isnan(B)] - - - - # Fix for mixed ETS models producing negative values - if (e_type == "M" and any(t in ["A", "Ad"] for t in [t_type, s_type]) or - t_type == "M" and any(t in ["A", "Ad"] for t in [e_type, s_type]) or - s_type == "M" and any(t in ["A", "Ad"] for t in [e_type, t_type])): - if e_type == "M" and ("level" in B) and (B["level"] <= 0): - B["level"] = y_in_sample[0] - if t_type == "M" and ("trend" in B) and (B["trend"] <= 0): - B["trend"] = 1 - seasonal_params = [p for p in B.keys() if p.startswith("seasonal")] - if s_type == "M" and any(B[p] <= 0 for p in seasonal_params): - for p in seasonal_params: - if B[p] <= 0: - B[p] = 1 - - # Create the vector of initials for the optimisation - if B is None: - B = b_values['B'] - if lb is None: - lb = b_values['Bl'] - if ub is None: - ub = b_values['Bu'] - - # Companion matrices for the polynomials calculation -> stationarity / stability checks - if arima_model: - # AR polynomials - ar_polynomial_matrix = np.zeros((np.sum(ar_orders) * lags, np.sum(ar_orders) * lags)) - if ar_polynomial_matrix.shape[0] > 1: - ar_polynomial_matrix[1:, :-1] = np.eye(ar_polynomial_matrix.shape[0] - 1) - # MA polynomials - ma_polynomial_matrix = np.zeros((np.sum(ma_orders) * lags, np.sum(ma_orders) * lags)) - if ma_polynomial_matrix.shape[0] > 1: - ma_polynomial_matrix[1:, :-1] = np.eye(ma_polynomial_matrix.shape[0] - 1) - else: - ma_polynomial_matrix = ar_polynomial_matrix = None - - # If the distribution is default, change it according to the error term - if distribution == "default": - if loss == "likelihood": - distribution_new = "dnorm" if e_type == "A" else "dgamma" - elif loss in ["MAEh", "MACE", "MAE"]: - distribution_new = "dlaplace" - elif loss in ["HAMh", "CHAM", "HAM"]: - distribution_new = "ds" - else: - distribution_new = "dnorm" + # If the distribution is default, change it according to the error term + if general_dict['distribution'] == "default": + if general_dict['loss'] == "likelihood": + general_dict['distribution_new'] = "dnorm" if model_type_dict['error_type'] == "A" else "dgamma" + elif general_dict['loss'] in ["MAEh", "MACE", "MAE"]: + general_dict['distribution_new'] = "dlaplace" + elif general_dict['loss'] in ["HAMh", "CHAM", "HAM"]: + general_dict['distribution_new'] = "ds" else: - distribution_new = distribution - - - # Parameters are chosen to speed up the optimisation process and have decent accuracy - #opts = { - # 'algorithm': algorithm, - # 'xtol_rel': xtol_rel, - # 'xtol_abs': xtol_abs, - # 'ftol_rel': ftol_rel, - # 'ftol_abs': ftol_abs, - # 'maxeval': maxeval_used, - # 'maxtime': maxtime, - # 'print_level': print_level - #} - - # Create nlopt optimizer object - opt = nlopt.opt(nlopt.LD_SLSQP, len(B)) # Use SLSQP algorithm to match R code - - # Set bounds - opt.set_lower_bounds(lb) - opt.set_upper_bounds(ub) - - # Set stopping criteria - opt.set_ftol_rel(ftol_rel) - opt.set_xtol_rel(xtol_rel) - opt.set_xtol_abs(xtol_abs) - opt.set_ftol_abs(ftol_abs) - opt.set_maxeval(maxeval_used) - if maxtime is not None: - opt.set_maxtime(maxtime) - - # Define objective function wrapper since nlopt expects different signature - def objective_wrapper(x, grad): - return CF(x, - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, y_in_sample, - ot, ot_logical, occurrence_model, obs_in_sample, - components_number_ets, components_number_ets_seasonal, components_number_ets_non_seasonal, - components_number_arima, - lags, lags_model, lags_model_all, lags_model_max, - index_lookup_table, profiles_recent_table, - adam_created['mat_vt'], adam_created['mat_wt'], adam_created['mat_f'], adam_created['vec_g'], - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, initial_type, initial_estimate, initial_level_estimate, - initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, non_zero_ari, non_zero_ma, adam_created['arima_polynomials'], - ar_estimate, ma_estimate, - ar_orders, i_orders, ma_orders, - ar_required, ma_required, arma_parameters, - xreg_model, xreg_number, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_required, constant_estimate, - bounds, loss, loss_function, distribution_new, - horizon, multisteps, - denominator, y_denominator, - other, other_parameter_estimate, lambda_, - ar_polynomial_matrix, ma_polynomial_matrix) - - # Set objective function - opt.set_min_objective(objective_wrapper) + general_dict['distribution_new'] = "dnorm" + else: + general_dict['distribution_new'] = general_dict['distribution'] + + # Print initial parameters if print_level is 41 + print_level_hidden = print_level + if print_level == 1: + print("Initial parameters:", B) + print_level = 0 + + # Set maxeval based on parameters + maxeval_used = maxeval + if maxeval is None: + maxeval_used = len(B) * 40 + # If xreg model, do more iterations + if explanatory_dict['xreg_model']: + maxeval_used = len(B) * 100 + maxeval_used = max(1000, maxeval_used) + + # Handle LASSO/RIDGE denominator calculation + if general_dict['loss'] in ["LASSO", "RIDGE"]: + if explanatory_dict['xreg_number'] > 0: + # Calculate standard deviation for each column of matWt + general_dict['denominator'] = np.std(adam_created['mat_wt'], axis=0) + # Replace infinite values with 1 + general_dict['denominator'][np.isinf(general_dict['denominator'])] = 1 + else: + general_dict['denominator'] = None + # Calculate denominator for y values + general_dict['y_denominator'] = max(np.std(np.diff(observations_dict['y_in_sample'])), 1) + else: + general_dict['denominator'] = None + general_dict['y_denominator'] = None + + general_dict['multisteps'] = multisteps + + + + # Parameters are chosen to speed up the optimisation process and have decent accuracy + #opts = { + # 'algorithm': algorithm, + # 'xtol_rel': xtol_rel, + # 'xtol_abs': xtol_abs, + # 'ftol_rel': ftol_rel, + # 'ftol_abs': ftol_abs, + # 'maxeval': maxeval_used, + # 'maxtime': maxtime, + # 'print_level': print_level + #} + + # Create nlopt optimizer object + opt = nlopt.opt(nlopt.LD_SLSQP, len(B)) # Use SLSQP algorithm to match R code + + # Set bounds + opt.set_lower_bounds(lb) + opt.set_upper_bounds(ub) + opt.set_xtol_rel(1e-6) # Relative tolerance on optimization parameters + opt.set_ftol_rel(1e-6) # Relative tolerance on function value + + if maxtime is not None: + opt.set_maxtime(maxtime) + + # Define objective function wrapper since nlopt expects different signature + def objective_wrapper(x, grad): + return CF(x, + model_type_dict, + components_dict, + lags_dict, + adam_created, + persistence_dict, + initials_dict, + arima_dict, + explanatory_dict, + phi_dict, + constant_dict, + observations_dict, + profile_dict, + general_dict, + bounds = "usual") + + # Set objective function + opt.set_min_objective(objective_wrapper) + + try: + # Run optimization + x = opt.optimize(B) + res_fun = opt.last_optimum_value() + res = type('OptimizeResult', (), { + 'x': x, + 'fun': res_fun, + 'success': True + }) + except Exception as e: + print(f"Optimization failed: {str(e)}") + res = type('OptimizeResult', (), { + 'x': B, + 'fun': 1e+300, + 'success': False + }) + + # If optimization failed, try again with modified initial values + if np.isinf(res.fun) or res.fun == 1e+300: + # Reset initial values + if model_type_dict['ets_model']: + B[:components_dict['components_number_ets']] = 0 + if model_type_dict['arima_model']: + start_idx = components_dict['components_number_ets'] + persistence_dict['persistence_xreg_estimate'] * explanatory_dict['xreg_number'] + end_idx = start_idx + sum(np.array(arima_dict['ar_orders']) * arima_dict['ar_estimate'] + np.array(arima_dict['ma_orders']) * arima_dict['ma_estimate']) + B[start_idx:end_idx] = 0.01 try: - # Run optimization + # Try optimization again x = opt.optimize(B) res_fun = opt.last_optimum_value() res = type('OptimizeResult', (), { @@ -270,149 +264,119 @@ def objective_wrapper(x, grad): 'success': True }) except Exception as e: - print(f"Optimization failed: {str(e)}") + print(f"Second optimization attempt failed: {str(e)}") res = type('OptimizeResult', (), { 'x': B, 'fun': 1e+300, 'success': False }) - # If optimization failed, try again with modified initial values - if np.isinf(res.fun) or res.fun == 1e+300: - # Reset initial values - if ets_model: - B[:components_number_ets] = 0 - if arima_model: - start_idx = components_number_ets + persistence_xreg_estimate * xreg_number - end_idx = start_idx + sum(np.array(ar_orders) * ar_estimate + np.array(ma_orders) * ma_estimate) - B[start_idx:end_idx] = 0.01 - - try: - # Try optimization again - x = opt.optimize(B) - res_fun = opt.last_optimum_value() - res = type('OptimizeResult', (), { - 'x': x, - 'fun': res_fun, - 'success': True - }) - except Exception as e: - print(f"Second optimization attempt failed: {str(e)}") - res = type('OptimizeResult', (), { - 'x': B, - 'fun': 1e+300, - 'success': False - }) - - if print_level_hidden > 0: - print(res) - - # Check the obtained parameters and the loss value and remove redundant parameters - # Cases to consider: - # 1. Some smoothing parameters are zero or one; - # 2. The cost function value is -Inf (due to no variability in the sample); - - # Prepare the values to return - B[:] = res.x - CF_value = res.fun - # A fix for the special case of LASSO/RIDGE with lambda==1 - if any(loss == loss_type for loss_type in ["LASSO", "RIDGE"]) and lambda_ == 1: - CF_value = 0 - n_param_estimated = len(B) + if print_level_hidden > 0: + print(res) + + # Check the obtained parameters and the loss value and remove redundant parameters + # Cases to consider: + # 1. Some smoothing parameters are zero or one; + # 2. The cost function value is -Inf (due to no variability in the sample); + + # Prepare the values to return + B[:] = res.x + CF_value = res.fun + # A fix for the special case of LASSO/RIDGE with lambda==1 + if any(general_dict['loss'] == loss_type for loss_type in ["LASSO", "RIDGE"]) and general_dict['lambda_'] == 1: + CF_value = 0 + n_param_estimated = len(B) + + + # Return a proper logLik class equivalent + log_lik_adam_value = log_Lik_ADAM( + B, + model_type_dict, + components_dict, + lags_dict, + adam_created, + persistence_dict, + initials_dict, + arima_dict, + explanatory_dict, + phi_dict, + constant_dict, + observations_dict, + occurrence_dict, + general_dict, + profile_dict, + multisteps = False + ) - - # Return a proper logLik class equivalent - logLikADAMValue = logLikReturn( - B, - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, y_in_sample, - ot, ot_logical, occurrence_model, p_fitted, obs_in_sample, - components_number_ets, components_number_ets_seasonal, components_number_ets_non_seasonal, - components_number_arima, - lags, lags_model, lags_model_all, lags_model_max, - index_lookup_table, profiles_recent_table, - adam_created['mat_vt'], adam_created['mat_wt'], adam_created['mat_f'], adam_created['vec_g'], - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, non_zero_ari, non_zero_ma, ar_estimate, ma_estimate, - adam_created['arima_polynomials'], - ar_orders, i_orders, ma_orders, ar_required, ma_required, arma_parameters, - xreg_model, xreg_number, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_required, constant_estimate, - bounds, loss, loss_function, distribution_new, horizon, multisteps, - denominator, y_denominator, other, other_parameter_estimate, lambda_, - ar_polynomial_matrix, ma_polynomial_matrix - ) + # In case of likelihood, we typically have one more parameter to estimate - scale. + log_lik_adam_value = { + 'value': log_lik_adam_value, + 'nobs': observations_dict['obs_in_sample'], + 'df': n_param_estimated + (1 if general_dict['loss'] == "likelihood" else 0) + } - # In case of likelihood, we typically have one more parameter to estimate - scale. - logLikADAMValue = { - 'value': logLikADAMValue, - 'nobs': obs_in_sample, - 'df': n_param_estimated + (1 if loss == "likelihood" else 0) - } + # Here I will add regressors when I have olm + # line 3032 - 3322 - # Here I will add regressors when I have olm - # line 3032 - 3322 - - return { - 'B': B, - 'CF_value': CF_value, - 'n_param_estimated': n_param_estimated, - 'logLikADAMValue': logLikADAMValue, - 'xreg_model': xreg_model, - 'xreg_data': xreg_data, - 'xreg_number': xreg_number, - 'xreg_names': xreg_names, - 'xreg_model_initials': xreg_model_initials, - 'formula': formula, - 'initial_xreg_estimate': initial_xreg_estimate, - 'persistence_xreg_estimate': persistence_xreg_estimate, - 'xreg_parameters_missing': xreg_parameters_missing, - 'xreg_parameters_included': xreg_parameters_included, - 'xreg_parameters_estimated': xreg_parameters_estimated, - 'xreg_parameters_persistence': xreg_parameters_persistence, - 'arima_polynomials': adam_created['arima_polynomials'] - } + return { + 'B': B, + 'CF_value': CF_value, + 'n_param_estimated': n_param_estimated, + 'log_lik_adam_value': log_lik_adam_value, + + # skiping the regressions for now + # 'xreg_model': xreg_model, + # 'xreg_data': xreg_data, + # 'xreg_number': xreg_number, + # 'xreg_names': xreg_names, + # 'xreg_model_initials': xreg_model_initials, + # 'formula': formula, + # 'initial_xreg_estimate': initial_xreg_estimate, + # 'persistence_xreg_estimate': persistence_xreg_estimate, + # 'xreg_parameters_missing': xreg_parameters_missing, + # 'xreg_parameters_included': xreg_parameters_included, + # 'xreg_parameters_estimated': xreg_parameters_estimated, + # 'xreg_parameters_persistence': xreg_parameters_persistence, + 'arima_polynomials': adam_created['arima_polynomials'] + } - -def selector(model, models_pool, allow_multiplicative, - ets_model, e_type, t_type, s_type, damped, lags, - lags_model_seasonal, lags_model_arima, - obs_states, obs_in_sample, - y_in_sample, persistence, persistence_estimate, - persistence_level, persistence_level_estimate, - persistence_trend, persistence_trend_estimate, - persistence_seasonal, persistence_seasonal_estimate, - persistence_xreg, persistence_xreg_estimate, persistence_xreg_provided, - phi, phi_estimate, - initial_type, initial_level, initial_trend, initial_seasonal, - initial_arima, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, initial_xreg_provided, - arima_model, ar_required, i_required, ma_required, arma_parameters, - components_number_arima, components_names_arima, - xreg_model, xreg_model_initials, xreg_data, xreg_number, xreg_names, regressors, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_required, constant_estimate, constant_value, constant_name, - ot, ot_logical, occurrence_model, p_fitted, ic_function, - bounds, loss, loss_function, distribution, - horizon, multisteps, other, other_parameter_estimate, lambda_): +import math + +def selector( + model_type_dict, + phi_dict, + general_dict, + lags_dict, + observations_dict, + arima_dict, + constant_dict, + explanatory_dict, + occurrence_dict, + components_dict, + profiles_recent_table, + profiles_recent_provided, + persistence_results, + initials_results, + + criterion = "AICc", + silent = False +): """Creates a pool of models and selects the best of them""" + + # Note: + # If we call the selector we need custom dictionairies to pass each time! + # I need to find a way to pass it every time + # Check if the pool was provided. In case of "no", form the big and the small ones - if models_pool is None: + if model_type_dict['models_pool'] is None: # The variable saying that the pool was not provided. if not silent: print("Forming the pool of models based on... ", end="") # Define the whole pool of errors - if not allow_multiplicative: + if not model_type_dict['allow_multiplicative']: pool_errors = ["A"] pool_trends = ["N", "A", "Ad"] pool_seasonals = ["N", "A"] @@ -423,43 +387,43 @@ def selector(model, models_pool, allow_multiplicative, # Some preparation variables # If e_type is not Z, then check on additive errors - if e_type != "Z": - pool_errors = pool_errors_small = e_type + if model_type_dict['error_type'] != "Z": + pool_errors = pool_errors_small = model_type_dict['error_type'] else: pool_errors_small = "A" # If t_type is not Z, then create a pool with specified type - if t_type != "Z": - if t_type == "X": + if model_type_dict['trend_type'] != "Z": + if model_type_dict['trend_type'] == "X": pool_trends_small = ["N", "A"] pool_trends = ["N", "A", "Ad"] check_trend = True - elif t_type == "Y": + elif model_type_dict['trend_type'] == "Y": pool_trends_small = ["N", "M"] pool_trends = ["N", "M", "Md"] check_trend = True else: - if damped: - pool_trends = pool_trends_small = [t_type + "d"] + if model_type_dict['damped']: + pool_trends = pool_trends_small = [model_type_dict['trend_type'] + "d"] else: - pool_trends = pool_trends_small = [t_type] + pool_trends = pool_trends_small = [model_type_dict['trend_type']] check_trend = False else: pool_trends_small = ["N", "A"] check_trend = True # If s_type is not Z, then create specific pools - if s_type != "Z": - if s_type == "X": + if model_type_dict['season_type'] != "Z": + if model_type_dict['season_type'] == "X": pool_seasonals = pool_seasonals_small = ["N", "A"] check_seasonal = True - elif s_type == "Y": + elif model_type_dict['season_type'] == "Y": pool_seasonals_small = ["N", "M"] pool_seasonals = ["N", "M"] check_seasonal = True else: - pool_seasonals_small = [s_type] - pool_seasonals = [s_type] + pool_seasonals_small = [model_type_dict['season_type']] + pool_seasonals = [model_type_dict['season_type']] check_seasonal = False else: pool_seasonals_small = ["N", "A", "M"] @@ -475,7 +439,7 @@ def selector(model, models_pool, allow_multiplicative, # Align error and seasonality, if the error was not forced to be additive # The new pool: "ANN" "ANA" "MNM" "AAN" "AAA" "MAM" - if any(model[2] == "M" for model in pool_small) and e_type not in ["A", "X"]: + if any(model[2] == "M" for model in pool_small) and model_type_dict['error_type'] not in ["A", "X"]: for i, model in enumerate(pool_small): if model[2] == "M": pool_small[i] = "M" + model[1:] @@ -493,57 +457,61 @@ def selector(model, models_pool, allow_multiplicative, # Branch and bound is here while check: + + # here just update the values on the dictionaries + # I think its going to temporary work + i += 1 model_current = pool_small[j-1] - e_type = model_current[0] - t_type = model_current[1] + # create a copy of the model_type_dict and the phi_dict + model_type_dict_temp = model_type_dict.copy() + model_type_dict_temp['model'] = model_current + phi_dict_temp = phi_dict.copy() + + # Replace the values on the dictionary + model_type_dict_temp['error_type'] = model_current[0] + model_type_dict_temp['trend_type'] = model_current[1] if len(model_current) == 4: - phi = 0.95 - phi_estimate = True - s_type = model_current[3] + phi_dict_temp['phi'] = 0.95 + phi_dict_temp['phi_estimate'] = True + model_type_dict_temp['season_type'] = model_current[3] else: - phi = 1 - phi_estimate = False - s_type = model_current[2] - - results[i-1] = estimator( - ets_model, e_type, t_type, s_type, lags, lags_model_seasonal, lags_model_arima, - obs_states, obs_in_sample, - y_in_sample, persistence, persistence_estimate, - persistence_level, persistence_level_estimate, - persistence_trend, persistence_trend_estimate, - persistence_seasonal, persistence_seasonal_estimate, - persistence_xreg, persistence_xreg_estimate, persistence_xreg_provided, - phi, phi_estimate, - initial_type, initial_level, initial_trend, initial_seasonal, - initial_arima, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, initial_xreg_provided, - arima_model, ar_required, i_required, ma_required, arma_parameters, - components_number_arima, components_names_arima, - formula, xreg_model, xreg_model_initials, xreg_data, xreg_number, xreg_names, regressors, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_required, constant_estimate, constant_value, constant_name, - ot, ot_logical, occurrence_model, p_fitted, - bounds, loss, loss_function, distribution, - horizon, multisteps, other, other_parameter_estimate, lambda_param - ) - - results[i-1]["IC"] = ic_function(results[i-1]["logLikADAMValue"]) - results[i-1]["Etype"] = e_type - results[i-1]["Ttype"] = t_type - results[i-1]["Stype"] = s_type - results[i-1]["phiEstimate"] = phi_estimate - - if phi_estimate: - results[i-1]["phi"] = results[i-1]["B"].get("phi") + phi_dict_temp['phi'] = 1 + phi_dict_temp['phi_estimate'] = False + model_type_dict_temp['season_type'] = model_current[2] + + results[i-1] = {} + results[i-1]['adam_estimated'] = estimator( + general_dict= general_dict, + model_type_dict = model_type_dict_temp, + lags_dict = lags_dict, + observations_dict = observations_dict, + arima_dict=arima_dict, + constant_dict=constant_dict, + explanatory_dict=explanatory_dict, + profiles_recent_table= profiles_recent_table, + profiles_recent_provided= profiles_recent_provided, + persistence_dict=persistence_results, + initials_dict=initials_results, + occurrence_dict=occurrence_dict, + phi_dict=phi_dict, + components_dict=components_dict, + ) + + # this need further itteration on how to return outputs + results[i-1]["IC"] = ic_function(general_dict['ic'],loglik=results[i-1]['adam_estimated']["log_lik_adam_value"]) + results[i-1]['model_type_dict'] = model_type_dict_temp + results[i-1]['phi_dict'] = phi_dict_temp + results[i-1]['model'] = model_current + + if phi_dict_temp['phi_estimate']: + results[i-1]['phi_dict']["phi"] = results[i-1]["B"].get("phi") else: - results[i-1]["phi"] = 1 + results[i-1]['phi_dict']["phi"] = 1 - results[i-1]["model"] = model_current + #results[i-1]['model'] = model_current if models_tested is None: models_tested = [model_current] @@ -555,19 +523,19 @@ def selector(model, models_pool, allow_multiplicative, if results[best_i-1]["IC"] <= results[i-1]["IC"]: # If Ttype is the same, then we check seasonality if model_current[1] == pool_small[best_j-1][1]: - pool_seasonals = results[best_i-1]["Stype"] + pool_seasonals = results[best_i-1]["model_type_dict"]["season_type"] check_seasonal = False j = [k+1 for k in range(len(pool_small)) if pool_small[k] != pool_small[best_j-1] and pool_small[k][-1] == pool_seasonals] # Otherwise we checked trend else: - pool_trends = results[best_j-1]["Ttype"] + pool_trends = results[best_j-1]["model_type_dict"]["trend_type"] check_trend = False else: # If the trend is the same if model_current[1] == pool_small[best_i-1][1]: - pool_seasonals = [s for s in pool_seasonals if s != results[best_i-1]["Stype"]] + pool_seasonals = [s for s in pool_seasonals if s != model_type_dict_temp['season_type']] if len(pool_seasonals) > 1: # Select another seasonal model, not from previous iteration and not current best_j = j @@ -582,7 +550,7 @@ def selector(model, models_pool, allow_multiplicative, pool_small[k][1] != model_current[1]] check_seasonal = False else: - pool_trends = [t for t in pool_trends if t != results[best_j-1]["Ttype"]] + pool_trends = [t for t in pool_trends if t != model_type_dict_temp['trend_type']] best_i = i best_j = j check_trend = False @@ -628,51 +596,46 @@ def selector(model, models_pool, allow_multiplicative, model_current = models_pool[j-1] # print(model_current) - e_type = model_current[0] - t_type = model_current[1] + model_type_dict_temp['error_type'] = model_current[0] + model_type_dict_temp['trend_type'] = model_current[1] if len(model_current) == 4: - phi = 0.95 - s_type = model_current[3] - phi_estimate = True + phi_dict_temp['phi'] = 0.95 + model_type_dict_temp['season_type'] = model_current[3] + phi_dict_temp['phi_estimate'] = True else: - phi = 1 - s_type = model_current[2] - phi_estimate = False - - - results[j-1] = estimator( - ets_model, e_type, t_type, s_type, lags, lags_model_seasonal, lags_model_arima, - obs_states, obs_in_sample, - y_in_sample, persistence, persistence_estimate, - persistence_level, persistence_level_estimate, - persistence_trend, persistence_trend_estimate, - persistence_seasonal, persistence_seasonal_estimate, - persistence_xreg, persistence_xreg_estimate, persistence_xreg_provided, - phi, phi_estimate, - initial_type, initial_level, initial_trend, initial_seasonal, - initial_arima, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, initial_xreg_provided, - arima_model, ar_required, i_required, ma_required, arma_parameters, - components_number_arima, components_names_arima, - formula, xreg_model, xreg_model_initials, xreg_data, xreg_number, xreg_names, regressors, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, - constant_required, constant_estimate, constant_value, constant_name, - ot, ot_logical, occurrence_model, p_fitted, - bounds, loss, loss_function, distribution, - horizon, multisteps, other, other_parameter_estimate, lambda_) - - results[j-1]["IC"] = ic_function(results[j-1]["logLikADAMValue"]) - results[j-1]["Etype"] = e_type - results[j-1]["Ttype"] = t_type - results[j-1]["Stype"] = s_type - results[j-1]["phiEstimate"] = phi_estimate - if phi_estimate: - results[j-1]["phi"] = results[j-1]["B"][next(i for i,v in enumerate(results[j-1]["B"].keys()) if v=="phi")] + phi_dict_temp['phi'] = 1 + model_type_dict_temp['season_type'] = model_current[2] + phi_dict_temp['phi_estimate'] = False + + + results[i-1] = {} + results[i-1]['adam_estimated'] = estimator( + general_dict= general_dict, + model_type_dict = model_type_dict_temp, + lags_dict = lags_dict, + observations_dict = observations_dict, + arima_dict=arima_dict, + constant_dict=constant_dict, + explanatory_dict=explanatory_dict, + profiles_recent_table= profiles_recent_table, + profiles_recent_provided= profiles_recent_provided, + persistence_dict=persistence_results, + initials_dict=initials_results, + occurrence_dict=occurrence_dict, + phi_dict=phi_dict, + components_dict=components_dict, + ) + + # this need further itteration on how to return outputs + results[i-1]["IC"] = ic_function(general_dict['ic'],loglik=results[i-1]['adam_estimated']["log_lik_adam_value"]) + results[i-1]['model_type_dict'] = model_type_dict_temp + results[i-1]['phi_dict'] = phi_dict_temp + results[i-1]['model'] = model_current + + if phi_dict_temp['phi_estimate']: + results[i-1]['phi_dict']["phi"] = results[i-1]["B"].get("phi") else: - results[j-1]["phi"] = 1 - results[j-1]["model"] = model_current + results[i-1]['phi_dict']["phi"] = 1 if not silent: @@ -689,131 +652,153 @@ def selector(model, models_pool, allow_multiplicative, # Replace NaN values with large number ic_selection = [1e100 if math.isnan(x) else x for x in ic_selection] - return {"results": results, "icSelection": ic_selection_dict} - - -def preparator(B, ets_model, e_type, t_type, s_type, - lags_model, lags_model_max, lags_model_all, - components_number_ets, components_number_ets_seasonal, - xreg_number, distribution, loss, - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, other_parameter_estimate, - initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - mat_vt, mat_wt, mat_f, vec_g, - occurrence_model, ot, oes_model, - parameters_number, cf_value, - arima_model, ar_required, ma_required, - ar_estimate, ma_estimate, ar_orders, i_orders, ma_orders, - non_zero_ari, non_zero_ma, - arima_polynomials, arma_parameters, - constant_required, constant_estimate): - """Function prepares all the matrices and vectors for return""" + return {"results": results, "ic_selection": ic_selection_dict} + +def preparator( + +# Model type info + model_type_dict, + + # Components info + components_dict, + + # Lags info + lags_dict, + + # Matrices from creator + matrices_dict, + + # Parameter dictionaries + persistence_checked, + initials_checked, + arima_checked, + explanatory_checked, + phi_dict, + constants_checked, + + # Other parameters + observations_dict, + occurrence_dict, + general_dict, + profiles_dict, + + # The parameter vector + adam_estimated, + + # Optional parameters + bounds="usual", + other=None +): - if model_do != "use": - # Fill in the matrices - adam_elements = filler( - B, - ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, - components_number_ets, components_number_ets_non_seasonal, - components_number_ets_seasonal, components_number_arima, - lags, lags_model, lags_model_max, - mat_vt, mat_wt, mat_f, vec_g, - persistence_estimate, persistence_level_estimate, persistence_trend_estimate, - persistence_seasonal_estimate, persistence_xreg_estimate, - phi_estimate, - initial_type, initial_estimate, - initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, - initial_arima_estimate, initial_xreg_estimate, - arima_model, ar_estimate, ma_estimate, ar_orders, i_orders, ma_orders, - ar_required, ma_required, arma_parameters, - non_zero_ari, non_zero_ma, arima_polynomials, - xreg_model, xreg_number, - xreg_parameters_missing, xreg_parameters_included, - xreg_parameters_estimated, xreg_parameters_persistence, constant_estimate + # Fill in the matrices if needed + if general_dict.get("model_do") != "use": + matrices_dict = filler( + adam_estimated['B'], + model_type_dict = model_type_dict, + components_dict = components_dict, + lags_dict = lags_dict, + matrices_dict = matrices_dict, + persistence_checked = persistence_checked, + initials_checked = initials_checked, + arima_checked = arima_checked, + explanatory_checked = explanatory_checked, + phi_dict = phi_dict, + constants_checked = constants_checked ) - # Write down phi - if phi_estimate: - phi[:] = B[next(i for i,v in enumerate(B.keys()) if v=="phi")] + if phi_dict["phi_estimate"]: + phi_dict["phi"] = adam_estimated['B'],[next(i for i,v in enumerate(B.keys()) if v=="phi")] # Write down the initials in the recent profile - profiles_recent_table[:] = mat_vt[:, :lags_model_max] - profiles_recent_initial = mat_vt[:, :lags_model_max].copy() + profiles_dict["profiles_recent_table"][:] = matrices_dict['mat_vt'][:, :lags_dict["lags_model_max"]] + profiles_dict["profiles_recent_initial"] = matrices_dict['mat_vt'][:, :lags_dict["lags_model_max"]].copy() # Fit the model to the data - adam_fitted = adam_fitter_wrap( - mat_vt, mat_wt, mat_f, vec_g, - lags_model_all, index_lookup_table, profiles_recent_table, - e_type, t_type, s_type, components_number_ets, components_number_ets_seasonal, - components_number_arima, xreg_number, constant_required, - y_in_sample, ot, any(x in initial_type for x in ["complete", "backcasting"]) + adam_fitted = adam_fitter( + matrices_dict['mat_vt'], matrices_dict['mat_wt'], matrices_dict['mat_f'], matrices_dict['vec_g'], + lags_dict['lags_model_all'], profiles_dict['index_lookup_table'], profiles_dict['profiles_recent_table'], + model_type_dict["error_type"], model_type_dict["trend_type"], model_type_dict["season_type"], + components_dict["components_number_ets"], components_dict["components_number_ets_seasonal"], + components_dict.get("components_number_arima", 0), explanatory_checked["xreg_number"], + constants_checked["constant_required"], + observations_dict["y_in_sample"], observations_dict["ot"], + any(x in initials_checked["initial_type"] for x in ["complete", "backcasting"]) ) - mat_vt[:] = adam_fitted["mat_vt"] - - # Write down the recent profile for future use - profiles_recent_table = adam_fitted["profile"] - + matrices_dict['mat_vt'][:] = adam_fitted["matVt"] + profiles_dict["profiles_recent_table"] = adam_fitted["profile"] # Make sure that there are no negative values in multiplicative components # This might appear in case of bounds="a" - if t_type == "M" and (np.any(np.isnan(mat_vt[1,:])) or np.any(mat_vt[1,:] <= 0)): - i = np.where(mat_vt[1,:] <= 0)[0] - mat_vt[1,i] = 1e-6 - profiles_recent_table[1,i] = 1e-6 - - if s_type == "M" and np.all(~np.isnan(mat_vt[components_number_ets_non_seasonal:components_number_ets_non_seasonal+components_number_ets_seasonal,:])) and \ - np.any(mat_vt[components_number_ets_non_seasonal:components_number_ets_non_seasonal+components_number_ets_seasonal,:] <= 0): - i = np.where(mat_vt[components_number_ets_non_seasonal:components_number_ets_non_seasonal+components_number_ets_seasonal,:] <= 0)[0] - mat_vt[components_number_ets_non_seasonal:components_number_ets_non_seasonal+components_number_ets_seasonal,i] = 1e-6 - i = np.where(profiles_recent_table[components_number_ets_non_seasonal:components_number_ets_non_seasonal+components_number_ets_seasonal,:] <= 0)[0] - profiles_recent_table[components_number_ets_non_seasonal:components_number_ets_non_seasonal+components_number_ets_seasonal,i] = 1e-6 + if model_type_dict["trend_type"] == "M" and (np.any(np.isnan(matrices_dict['mat_vt'][1,:])) or np.any(matrices_dict['mat_vt'][1,:] <= 0)): + i = np.where(matrices_dict['mat_vt'][1,:] <= 0)[0] + matrices_dict['mat_vt'][1,i] = 1e-6 + profiles_dict["profiles_recent_table"][1,i] = 1e-6 + + if model_type_dict["season_type"] == "M" and np.all(~np.isnan(matrices_dict['mat_vt'][components_dict["components_number_ets_non_seasonal"]:components_dict["components_number_ets_non_seasonal"]+components_dict["components_number_ets_seasonal"],:])) and \ + np.any(matrices_dict['mat_vt'][components_dict["components_number_ets_non_seasonal"]:components_dict["components_number_ets_non_seasonal"]+components_dict["components_number_ets_seasonal"],:] <= 0): + i = np.where(matrices_dict['mat_vt'][components_dict["components_number_ets_non_seasonal"]:components_dict["components_number_ets_non_seasonal"]+components_dict["components_number_ets_seasonal"],:] <= 0)[0] + matrices_dict['mat_vt'][components_dict["components_number_ets_non_seasonal"]:components_dict["components_number_ets_non_seasonal"]+components_dict["components_number_ets_seasonal"],i] = 1e-6 + i = np.where(profiles_dict["profiles_recent_table"][components_dict["components_number_ets_non_seasonal"]:components_dict["components_number_ets_non_seasonal"]+components_dict["components_number_ets_seasonal"],:] <= 0)[0] + profiles_dict["profiles_recent_table"][components_dict["components_number_ets_non_seasonal"]:components_dict["components_number_ets_non_seasonal"]+components_dict["components_number_ets_seasonal"],i] = 1e-6 # Prepare fitted and error with ts / zoo - if any(y_classes == "ts"): - y_fitted = pd.Series(np.full(obs_in_sample, np.nan), index=pd.date_range(start=y_start, periods=obs_in_sample, freq=y_frequency)) - errors = pd.Series(np.full(obs_in_sample, np.nan), index=pd.date_range(start=y_start, periods=obs_in_sample, freq=y_frequency)) + if not isinstance(observations_dict["y_in_sample"], pd.Series): + y_fitted = pd.Series(np.full(observations_dict["obs_in_sample"], np.nan), + index=pd.date_range(start=observations_dict["y_start"], + periods=observations_dict["obs_in_sample"], + freq=observations_dict["frequency"])) + errors = pd.Series(np.full(observations_dict["obs_in_sample"], np.nan), + index=pd.date_range(start=observations_dict["y_start"], + periods=observations_dict["obs_in_sample"], + freq=observations_dict["frequency"])) else: - y_fitted = pd.Series(np.full(obs_in_sample, np.nan), index=y_in_sample_index) - errors = pd.Series(np.full(obs_in_sample, np.nan), index=y_in_sample_index) + y_fitted = pd.Series(np.full(observations_dict["obs_in_sample"], np.nan), index=observations_dict["y_in_sample_index"]) + errors = pd.Series(np.full(observations_dict["obs_in_sample"], np.nan), index=observations_dict["y_in_sample_index"]) + + errors[:] = adam_fitted["errors"].flatten() + y_fitted[:] = adam_fitted["yFitted"].flatten() - errors[:] = adam_fitted["errors"] - y_fitted[:] = adam_fitted["y_fitted"] # Check what was returned in the end if np.any(np.isnan(y_fitted)) or np.any(pd.isna(y_fitted)): warnings.warn("Something went wrong in the estimation of the model and NaNs were produced. " "If this is a mixed model, consider using the pure ones instead.") - if occurrence_model: - y_fitted[:] = y_fitted * p_fitted + if occurrence_dict["occurrence_model"]: + y_fitted[:] = y_fitted * occurrence_dict["p_fitted"] # Fix the cases, when we have zeroes in the provided occurrence - if occurrence == "provided": - y_fitted[~ot_logical] = y_fitted[~ot_logical] * p_fitted[~ot_logical] + if occurrence_dict["occurrence"] == "provided": + y_fitted[~occurrence_dict["ot_logical"]] = y_fitted[~occurrence_dict["ot_logical"]] * occurrence_dict["p_fitted"][~occurrence_dict["ot_logical"]] # Produce forecasts if the horizon is non-zero - if horizon > 0: - if any(y_classes == "ts"): - y_forecast = pd.Series(np.full(horizon, np.nan), - index=pd.date_range(start=y_forecast_start, periods=horizon, freq=y_frequency)) + if general_dict["h"] > 0: + if not isinstance(observations_dict.get("y_in_sample"), pd.Series): + y_forecast = pd.Series(np.full(general_dict["h"], np.nan), + index=pd.date_range(start=observations_dict["y_forecast_start"], + periods=general_dict["h"], + freq=observations_dict["frequency"])) else: - y_forecast = pd.Series(np.full(horizon, np.nan), index=y_forecast_index) - - y_forecast[:] = adam_forecaster_wrap( - mat_wt[-horizon:], mat_f, - lags_model_all, - index_lookup_table[:, lags_model_max + obs_in_sample + np.arange(horizon)], - profiles_recent_table, - e_type, t_type, s_type, - components_number_ets, components_number_ets_seasonal, - components_number_arima, xreg_number, constant_required, - horizon - ) + y_forecast = pd.Series(np.full(general_dict["h"], np.nan), + index=observations_dict["y_forecast_index"]) + y_forecast[:] = adam_forecaster( + matrixWt=matrices_dict['mat_wt'][-general_dict["h"]:], + matrixF=matrices_dict['mat_f'], + lags=lags_dict["lags_model_all"], + indexLookupTable=profiles_dict["index_lookup_table"], + profilesRecent=profiles_dict["profiles_recent_table"], + E=model_type_dict["error_type"], + T=model_type_dict["trend_type"], + S=model_type_dict["season_type"], + nNonSeasonal=components_dict["components_number_ets"], + nSeasonal=components_dict["components_number_ets_seasonal"], + nArima=components_dict.get("components_number_arima", 0), + nXreg=explanatory_checked["xreg_number"], + constant=constants_checked["constant_required"], + horizon=general_dict["h"] + ).flatten() # Make safety checks # If there are NaN values @@ -821,62 +806,73 @@ def preparator(B, ets_model, e_type, t_type, s_type, y_forecast[np.isnan(y_forecast)] = 0 # Amend forecasts, multiplying by probability - if occurrence_model and not occurrence_model_provided: - y_forecast[:] = y_forecast * np.array(forecast(oes_model, h=h).mean) - elif (occurrence_model and occurrence_model_provided) or occurrence == "provided": - y_forecast[:] = y_forecast * p_forecast + + # skiping for now we dont have the occurence yet + # if occurrence_dict["occurrence_model"] and not occurrence_dict["occurrence_model_provided"]: + # y_forecast[:] = y_forecast * np.array(forecast(occurrence_dict["oes_model"], h=general_dict["horizon"]).mean) + # elif (occurrence_dict["occurrence_model"] and occurrence_dict["occurrence_model_provided"]) or occurrence_dict["occurrence"] == "provided": + # y_forecast[:] = y_forecast * occurrence_dict["p_forecast"] else: - if any(y_classes == "ts"): - y_forecast = pd.Series([np.nan], index=pd.date_range(start=y_forecast_start, periods=1, freq=y_frequency)) + if any(observations_dict.get("y_classes", []) == "ts"): + y_forecast = pd.Series([np.nan], + index=pd.date_range(start=observations_dict["y_forecast_start"], + periods=1, + freq=observations_dict["y_frequency"])) else: - y_forecast = pd.Series(np.full(horizon, np.nan), index=y_forecast_index) + y_forecast = pd.Series(np.full(general_dict["horizon"], np.nan), + index=observations_dict["y_forecast_index"]) # If the distribution is default, change it according to the error term - if distribution == "default": - if loss == "likelihood": - if e_type == "A": - distribution = "dnorm" - elif e_type == "M": - distribution = "dgamma" - elif loss in ["MAEh", "MACE", "MAE"]: - distribution = "dlaplace" - elif loss in ["HAMh", "CHAM", "HAM"]: - distribution = "ds" - elif loss in ["MSEh", "MSCE", "MSE", "GPL"]: - distribution = "dnorm" + if general_dict["distribution"] == "default": + if general_dict["loss"] == "likelihood": + if model_type_dict["error_type"] == "A": + general_dict["distribution"] = "dnorm" + elif model_type_dict["error_type"] == "M": + general_dict["distribution"] = "dgamma" + elif general_dict["loss"] in ["MAEh", "MACE", "MAE"]: + general_dict["distribution"] = "dlaplace" + elif general_dict["loss"] in ["HAMh", "CHAM", "HAM"]: + general_dict["distribution"] = "ds" + elif general_dict["loss"] in ["MSEh", "MSCE", "MSE", "GPL"]: + general_dict["distribution"] = "dnorm" else: - distribution = "dnorm" + general_dict["distribution"] = "dnorm" # Initial values to return - initial_value = [None] * (ets_model * (1 + model_is_trendy + model_is_seasonal) + arima_model + xreg_model) - initial_value_ets = [None] * (ets_model * len(lags_model)) - initial_value_names = [""] * (ets_model * (1 + model_is_trendy + model_is_seasonal) + arima_model + xreg_model) - # The vector that defines what was estimated in the model - initial_estimated = [False] * (ets_model * (1 + model_is_trendy + model_is_seasonal * components_number_ets_seasonal) + - arima_model + xreg_model) + initial_value = [None] * (model_type_dict["ets_model"] * (1 + model_type_dict["model_is_trendy"] + model_type_dict["model_is_seasonal"]) + + arima_checked["arima_model"] + explanatory_checked["xreg_model"]) + initial_value_ets = [None] * (model_type_dict["ets_model"] * len(lags_dict["lags_model"])) + initial_value_names = [""] * (model_type_dict["ets_model"] * (1 + model_type_dict["model_is_trendy"] + model_type_dict["model_is_seasonal"]) + + arima_checked["arima_model"] + explanatory_checked["xreg_model"]) + # The vector that defines what was estimated in the model + initial_estimated = [False] * (model_type_dict["ets_model"] * (1 + model_type_dict["model_is_trendy"] + model_type_dict["model_is_seasonal"] * components_dict["components_number_ets_seasonal"]) + + arima_checked["arima_model"] + explanatory_checked["xreg_model"]) + # Write down the initials of ETS j = 0 - if ets_model: + if model_type_dict["ets_model"]: # Write down level, trend and seasonal - for i in range(len(lags_model)): + for i in range(len(lags_dict["lags_model"])): # In case of level / trend, we want to get the very first value - if lags_model[i] == 1: - initial_value_ets[i] = mat_vt[i, :lags_model_max][0] + if lags_dict["lags_model"][i] == 1: + initial_value_ets[i] = matrices_dict['mat_vt'][i, :lags_dict["lags_model_max"]][0] # In cases of seasonal components, they should be at the end of the pre-heat period else: - initial_value_ets[i] = mat_vt[i, :lags_model_max][-lags_model[i]:] + #print(lags_dict["lags_model"][i][0]) # here we might have an issue for taking the first element of the list + start_idx = lags_dict["lags_model_max"] - lags_dict["lags_model"][i][0] + initial_value_ets[i] = matrices_dict['mat_vt'][i, start_idx:lags_dict["lags_model_max"]] j = 0 # Write down level in the final list - initial_estimated[j] = initial_level_estimate + initial_estimated[j] = initials_checked["initial_level_estimate"] initial_value[j] = initial_value_ets[j] initial_value_names[j] = "level" - if model_is_trendy: + if model_type_dict["model_is_trendy"]: j = 1 - initial_estimated[j] = initial_trend_estimate + initial_estimated[j] = initials_checked["initial_trend_estimate"] # Write down trend in the final list initial_value[j] = initial_value_ets[j] # Remove the trend from ETS list @@ -884,15 +880,15 @@ def preparator(B, ets_model, e_type, t_type, s_type, initial_value_names[j] = "trend" # Write down the initial seasonals - if model_is_seasonal: - initial_estimated[j + 1:j + 1 + components_number_ets_seasonal] = initial_seasonal_estimate + if model_type_dict["model_is_seasonal"]: + initial_estimated[j + 1:j + 1 + components_dict["components_number_ets_seasonal"]] = initials_checked["initial_seasonal_estimate"] # Remove the level from ETS list initial_value_ets[0] = None j += 1 - if len(initial_seasonal_estimate) > 1: + if len(initials_checked["initial_seasonal_estimate"]) > 1: initial_value[j] = [x for x in initial_value_ets if x is not None] initial_value_names[j] = "seasonal" - for k in range(components_number_ets_seasonal): + for k in range(components_dict["components_number_ets_seasonal"]): initial_estimated[j + k] = f"seasonal{k+1}" else: initial_value[j] = next(x for x in initial_value_ets if x is not None) @@ -900,151 +896,159 @@ def preparator(B, ets_model, e_type, t_type, s_type, initial_estimated[j] = "seasonal" # Write down the ARIMA initials - if arima_model: + if arima_checked["arima_model"]: j += 1 - initial_estimated[j] = initial_arima_estimate - if initial_arima_estimate: - initial_value[j] = mat_vt[components_number_ets + components_number_arima - 1, :initial_arima_number] + initial_estimated[j] = initials_checked["initial_arima_estimate"] + if initials_checked["initial_arima_estimate"]: + initial_value[j] = matrices_dict['mat_vt'][components_dict["components_number_ets"] + components_dict.get("components_number_arima", 0) - 1, :initials_checked["initial_arima_number"]] else: - initial_value[j] = initial_arima + initial_value[j] = initials_checked["initial_arima"] initial_value_names[j] = "arima" initial_estimated[j] = "arima" - # Set names for initial values initial_value = {name: value for name, value in zip(initial_value_names, initial_value)} # Get persistence values - persistence = np.array(vec_g).flatten() - persistence = {name: value for name, value in zip(vec_g.index, persistence)} + persistence = np.array(matrices_dict['vec_g']).flatten() + + # I have no names for the matrix + #persistence = {name: value for name, value in zip(matrices_dict['vec_g'].index, persistence)} # Remove xreg persistence if needed - if xreg_model and regressors != "adapt": - regressors = "use" - elif not xreg_model: - regressors = None + if explanatory_checked["xreg_model"] and explanatory_checked.get("regressors") != "adapt": + explanatory_checked["regressors"] = "use" + elif not explanatory_checked["xreg_model"]: + explanatory_checked["regressors"] = None # Handle ARMA parameters - if arima_model: + if arima_checked["arima_model"]: arma_parameters_list = {} j = 0 - if ar_required and phi_estimate: + if arima_checked["ar_required"] and arima_checked["ar_estimate"]: # Avoid damping parameter phi by checking name length > 3 arma_parameters_list["ar"] = [b for name, b in B.items() if len(name) > 3 and name.startswith("phi")] j += 1 - elif ar_required and not phi_estimate: + elif arima_checked["ar_required"] and not arima_checked["ar_estimate"]: # Avoid damping parameter phi - arma_parameters_list["ar"] = [p for name, p in arma_parameters.items() if name.startswith("phi")] + arma_parameters_list["ar"] = [p for name, p in arima_checked["arma_parameters"].items() if name.startswith("phi")] j += 1 - if ma_required and ma_estimate: + if arima_checked["ma_required"] and arima_checked["ma_estimate"]: arma_parameters_list["ma"] = [b for name, b in B.items() if name.startswith("theta")] - elif ma_required and not ma_estimate: - arma_parameters_list["ma"] = [p for name, p in arma_parameters.items() if name.startswith("theta")] + elif arima_checked["ma_required"] and not arima_checked["ma_estimate"]: + arma_parameters_list["ma"] = [p for name, p in arima_checked["arma_parameters"].items() if name.startswith("theta")] else: arma_parameters_list = None - # Handle distribution parameters - if distribution in ["dalaplace", "dgnorm", "dlgnorm", "dt"] and other_parameter_estimate: - other = abs(B[-1]) + # for now I am skipping this one + if general_dict["distribution_new"] in ["dalaplace", "dgnorm", "dlgnorm", "dt"] and initials_checked["other_parameter_estimate"]: + other = abs(adam_estimated['B'],[-1]) # Calculate scale parameter using scaler function - # which() equivalent is just boolean indexing in numpy - scale = scaler(distribution, Etype, errors[ot_logical], y_fitted[ot_logical], obs_in_sample, other) + scale = scaler(general_dict["distribution_new"], + model_type_dict["error_type"], + errors[observations_dict["ot_logical"]], + y_fitted[observations_dict["ot_logical"]], + observations_dict["obs_in_sample"], + other) # Record constant if estimated - if constant_estimate: - constant_value = B[constant_name] - + if constants_checked["constant_estimate"]: + constant_value = adam_estimated['B'],[constants_checked["constant_name"]] + else: + constant_value = adam_estimated['B'][-1] # Prepare distribution parameters to return other_returned = {} # Write down parameters for distribution (always positive) - if other_parameter_estimate: - param_value = abs(B[-1]) - else: - param_value = other + # we skip the distributional parameters for now + + #if initials_checked["other_parameter_estimate"]: + # param_value = abs(adam_estimated['B'][-1]) + #else: + # param_value = other # Set parameter name based on distribution - if distribution == "dalaplace": - other_returned["alpha"] = param_value - elif distribution in ["dgnorm", "dlgnorm"]: - other_returned["shape"] = param_value - elif distribution == "dt": - other_returned["nu"] = param_value + #if general_dict["distribution"] == "dalaplace": + # other_returned["alpha"] = param_value + #elif general_dict["distribution"] in ["dgnorm", "dlgnorm"]: + # other_returned["shape"] = param_value + #elif general_dict["distribution"] == "dt": + # other_returned["nu"] = param_value # Add LASSO/RIDGE lambda if applicable - if loss in ["LASSO", "RIDGE"]: - other_returned["lambda"] = lambda_ + if general_dict["loss"] in ["LASSO", "RIDGE"]: + other_returned["lambda"] = general_dict["lambda_"] # Return ARIMA polynomials and indices for persistence and transition - if arima_model: - other_returned["polynomial"] = arima_polynomials - other_returned["ARIMA_indices"] = {"nonZeroARI": non_zero_ari, "nonZeroMA": non_zero_ma} - other_returned["ar_polynomial_matrix"] = np.zeros((ar_orders @ lags, ar_orders @ lags)) + if arima_checked["arima_model"]: + other_returned["polynomial"] = adam_estimated['arima_polynomials'] + other_returned["ARIMA_indices"] = { + "nonZeroARI": arima_checked["non_zero_ari"], + "nonZeroMA": arima_checked["non_zero_ma"] + } + other_returned["ar_polynomial_matrix"] = np.zeros((sum(arima_checked["ar_orders"]) * lags_dict["lags"], + sum(arima_checked["ar_orders"]) * lags_dict["lags"])) if other_returned["ar_polynomial_matrix"].shape[0] > 1: # Set diagonal elements to 1 except first row/col other_returned["ar_polynomial_matrix"][1:-1, 2:] = np.eye(other_returned["ar_polynomial_matrix"].shape[0]-2) - if ar_required: + if arima_checked["ar_required"]: other_returned["ar_polynomial_matrix"][:, 0] = -arima_polynomials["ar_polynomial"][1:] - other_returned["arma_parameters"] = arma_parameters + other_returned["arma_parameters"] = arima_checked["arma_parameters"] # Amend the class of state matrix - if "ts" in y_classes: - mat_vt = pd.Series( - mat_vt.T, - index=pd.date_range( - start=y.index[0] - pd.Timedelta(lags_model_max/y.index.freq), - periods=len(mat_vt.T), - freq=y.index.freq - ) - ) + if not isinstance(observations_dict.get("y_in_sample"), pd.Series): + mat_vt = pd.Series(matrices_dict['mat_vt'].T.flatten(), + index=pd.date_range(start=observations_dict["y_forecast_start"], + periods=len(matrices_dict['mat_vt'].T), + freq=observations_dict["frequency"])) else: - y_states_index = y_in_sample_index[0] - lags_model_max * np.diff(y_in_sample_index[-2:]) + \ - np.arange(lags_model_max) * np.diff(y_in_sample_index[-2:]) - y_states_index = np.concatenate([y_states_index, y_in_sample_index]) - mat_vt = pd.Series(mat_vt.T, index=y_states_index) - - parameters_number[1, 4] = np.sum(parameters_number[1, :4]) - + mat_vt = pd.Series(matrices_dict['mat_vt'].T, + index=observations_dict["y_forecast_index"]) + # Update parameters number + # There is an issue here that I need to fix with the parameters number + general_dict["parameters_number"][0][2] = np.sum(general_dict["parameters_number"][0][:2]) + + return { - "model": None, - "time_elapsed": None, - "data": np.column_stack((None, xreg_data)), - "holdout": None, - "fitted": y_fitted, - "residuals": errors, - "forecast": y_forecast, - "states": mat_vt, - "profile": profiles_recent_table, - "profile_initial": profiles_recent_initial, - "persistence": persistence, - "phi": phi, - "transition": mat_f, - "measurement": mat_wt, - "initial": initial_value, - "initial_type": initial_type, - "initial_estimated": initial_estimated, - "orders": orders, - "arma": arma_parameters_list, - "constant": constant_value, - "n_param": parameters_number, - "occurrence": oes_model, - "formula": formula, - "regressors": regressors, - "loss": loss, - "loss_value": cf_value, - "log_lik": log_lik_adam_value, - "distribution": distribution, - "scale": scale, - "other": other_returned, - "B": B, - "lags": lags, - "lags_all": lags_model_all, - "FI": fi - } \ No newline at end of file + "model": model_type_dict["model"], + "time_elapsed": None, # here will count the time + #"data": np.column_stack((None, explanatory_checked["xreg_data"])), + "holdout": general_dict["holdout"], + "fitted": y_fitted, + "residuals": errors, + "forecast": y_forecast, + "states": mat_vt, + "profile": profiles_dict["profiles_recent_table"], + "profile_initial": profiles_dict["profiles_recent_initial"], + "persistence": persistence, + "phi": phi_dict["phi"], + "transition": matrices_dict['mat_f'], + "measurement": matrices_dict['mat_wt'], + "initial": initial_value, + "initial_type": initials_checked["initial_type"], + "initial_estimated": initial_estimated, + "orders": general_dict.get("orders"), + "arma": arma_parameters_list, + "constant": constant_value, + "n_param": general_dict["parameters_number"], + "occurrence": occurrence_dict["oes_model"], + "formula": explanatory_checked.get("formula"), + "regressors": explanatory_checked.get("regressors"), + "loss": general_dict["loss"], + "loss_value": adam_estimated["CF_value"], + "log_lik": adam_estimated["log_lik_adam_value"], + "distribution": general_dict["distribution"], + "scale": scale, + "other": other_returned, + "B": adam_estimated['B'], + "lags": lags_dict["lags"], + "lags_all": lags_dict["lags_model_all"], + "FI": general_dict.get("fi") + } \ No newline at end of file diff --git a/python/smooth/adam_general/core/utils/cost_functions.py b/python/smooth/adam_general/core/utils/cost_functions.py index 150538a8..36528809 100644 --- a/python/smooth/adam_general/core/utils/cost_functions.py +++ b/python/smooth/adam_general/core/utils/cost_functions.py @@ -1,124 +1,132 @@ import numpy as np from numpy.linalg import eigvals -from python.smooth.adam_general.core.creator import filler -from python.smooth.adam_general.core.utils.utils import measurement_inverter, scaler, calculate_likelihood, calculate_entropy, calculate_multistep_loss +from core.creator import filler +from core.utils.utils import measurement_inverter, scaler, calculate_likelihood, calculate_entropy, calculate_multistep_loss import numpy as np - - - - -def CF(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, yInSample, - ot, otLogical, occurrenceModel, obsInSample, - componentsNumberETS, componentsNumberETSSeasonal, componentsNumberETSNonSeasonal, - componentsNumberARIMA, - lags, lagsModel, lagsModelAll, lagsModelMax, - indexLookupTable, profilesRecentTable, - matVt, matWt, matF, vecG, - persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate, - persistenceSeasonalEstimate, persistenceXregEstimate, phiEstimate, - initialType, initialEstimate, - initialLevelEstimate, initialTrendEstimate, initialSeasonalEstimate, - initialArimaEstimate, initialXregEstimate, - arimaModel, nonZeroARI, nonZeroMA, arEstimate, maEstimate, arimaPolynomials, - arOrders, iOrders, maOrders, arRequired, maRequired, armaParameters, - xregModel, xregNumber, - xregParametersMissing, xregParametersIncluded, - xregParametersEstimated, xregParametersPersistence, - constantRequired, constantEstimate, - bounds, loss, lossFunction, distribution, horizon, multisteps, - denominator=None, yDenominator=None, - other=None, otherParameterEstimate=False, lambda_param=None, +from smooth.adam_general._adam_general import adam_fitter, adam_forecaster + + +def CF(B, + model_type_dict, + components_dict, + lags_dict, + matrices_dict, + persistence_checked, + initials_checked, + arima_checked, + explanatory_checked, + phi_dict, + constants_checked, + observations_dict, + profile_dict, + general, + bounds = "usual", + other=None, otherParameterEstimate=False, arPolynomialMatrix=None, maPolynomialMatrix=None, - regressors=None): # Add regressors parameter here + regressors=None): # Fill in the matrices - adamElements = filler(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, - componentsNumberETS, componentsNumberETSNonSeasonal, - componentsNumberETSSeasonal, componentsNumberARIMA, - lags, lagsModel, lagsModelMax, - matVt, matWt, matF, vecG, - persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate, - persistenceSeasonalEstimate, persistenceXregEstimate, - phiEstimate, - initialType, initialEstimate, - initialLevelEstimate, initialTrendEstimate, initialSeasonalEstimate, - initialArimaEstimate, initialXregEstimate, - arimaModel, arEstimate, maEstimate, arOrders, iOrders, maOrders, - arRequired, maRequired, armaParameters, - nonZeroARI, nonZeroMA, arimaPolynomials, - xregModel, xregNumber, - xregParametersMissing, xregParametersIncluded, - xregParametersEstimated, xregParametersPersistence, constantEstimate) + adamElements = filler(B, + model_type_dict, + components_dict, + lags_dict, + matrices_dict, + persistence_checked, + initials_checked, + arima_checked, + explanatory_checked, + phi_dict, + constants_checked) # If we estimate parameters of distribution, take it from the B vector if otherParameterEstimate: other = abs(B[-1]) - if distribution in ["dgnorm", "dlgnorm"] and other < 0.25: + if general['distribution_new'] in ["dgnorm", "dlgnorm"] and other < 0.25: return 1e10 / other + # Check the bounds, classical restrictions if bounds == "usual": - if arimaModel and any([arEstimate, maEstimate]): - if arEstimate and sum(-adamElements['arimaPolynomials']['arPolynomial'][1:]) >= 1: + if arima_checked['arima_model'] and any([arima_checked['ar_estimate'], arima_checked['ma_estimate']]): + if arima_checked['ar_estimate'] and sum(-adamElements['arimaPolynomials']['arPolynomial'][1:]) >= 1: arPolynomialMatrix[:, 0] = -adamElements['arimaPolynomials']['arPolynomial'][1:] arPolyroots = np.abs(eigvals(arPolynomialMatrix)) if any(arPolyroots > 1): return 1e100 * np.max(arPolyroots) - if maEstimate and sum(adamElements['arimaPolynomials']['maPolynomial'][1:]) >= 1: + if arima_checked['ma_estimate'] and sum(adamElements['arimaPolynomials']['maPolynomial'][1:]) >= 1: maPolynomialMatrix[:, 0] = adamElements['arimaPolynomials']['maPolynomial'][1:] maPolyroots = np.abs(eigvals(maPolynomialMatrix)) if any(maPolyroots > 1): return 1e100 * np.max(np.abs(maPolyroots)) - if etsModel: - if any(adamElements['vecG'][:componentsNumberETS] > 1) or any(adamElements['vecG'][:componentsNumberETS] < 0): + if model_type_dict['ets_model']: + if any(adamElements['vec_g'][:components_dict['components_number_ets']] > 1) or \ + any(adamElements['vec_g'][:components_dict['components_number_ets']] < 0): return 1e300 - if modelIsTrendy: - if adamElements['vecG'][1] > adamElements['vecG'][0]: + if model_type_dict['model_is_trendy']: + if adamElements['vec_g'][1] > adamElements['vec_g'][0]: return 1e300 - if modelIsSeasonal and any(adamElements['vecG'][componentsNumberETSNonSeasonal:componentsNumberETSNonSeasonal+componentsNumberETSSeasonal] > (1 - adamElements['vecG'][0])): + if model_type_dict['model_is_seasonal'] and \ + any(adamElements['vec_g'][components_dict['components_number_ets_non_seasonal']: + components_dict['components_number_ets_non_seasonal'] + + components_dict['components_number_ets_seasonal']] > (1 - adamElements['vec_g'][0])): return 1e300 - elif modelIsSeasonal and any(adamElements['vecG'][componentsNumberETSNonSeasonal:componentsNumberETSNonSeasonal+componentsNumberETSSeasonal] > (1 - adamElements['vecG'][0])): + elif model_type_dict['model_is_seasonal'] and \ + any(adamElements['vec_g'][components_dict['components_number_ets_non_seasonal']: + components_dict['components_number_ets_non_seasonal'] + + components_dict['components_number_ets_seasonal']] > (1 - adamElements['vec_g'][0])): return 1e300 - if phiEstimate and (adamElements['matF'][1, 1] > 1 or adamElements['matF'][1, 1] < 0): + if phi_dict['phi_estimate'] and (adamElements['mat_f'][1, 1] > 1 or adamElements['mat_f'][1, 1] < 0): return 1e300 - if xregModel and regressors == "adapt": - if any(adamElements['vecG'][componentsNumberETS+componentsNumberARIMA:componentsNumberETS+componentsNumberARIMA+xregNumber] > 1) or \ - any(adamElements['vecG'][componentsNumberETS+componentsNumberARIMA:componentsNumberETS+componentsNumberARIMA+xregNumber] < 0): - return 1e100 * np.max(np.abs(adamElements['vecG'][componentsNumberETS+componentsNumberARIMA:componentsNumberETS+componentsNumberARIMA+xregNumber] - 0.5)) + if explanatory_checked['xreg_model'] and regressors == "adapt": + if any(adamElements['vec_g'][components_dict['components_number_ets'] + + components_dict['components_number_arima']: + components_dict['components_number_ets'] + + components_dict['components_number_arima'] + + explanatory_checked['xreg_number']] > 1) or \ + any(adamElements['vec_g'][components_dict['components_number_ets'] + + components_dict['components_number_arima']: + components_dict['components_number_ets'] + + components_dict['components_number_arima'] + + explanatory_checked['xreg_number']] < 0): + return 1e100 * np.max(np.abs(adamElements['vec_g'][components_dict['components_number_ets'] + + components_dict['components_number_arima']: + components_dict['components_number_ets'] + + components_dict['components_number_arima'] + + explanatory_checked['xreg_number']] - 0.5)) elif bounds == "admissible": - if arimaModel: - if arEstimate and (sum(-adamElements['arimaPolynomials']['arPolynomial'][1:]) >= 1 or sum(-adamElements['arimaPolynomials']['arPolynomial'][1:]) < 0): + if arima_checked['arima_model']: + if arima_checked['ar_estimate'] and (sum(-adamElements['arimaPolynomials']['arPolynomial'][1:]) >= 1 or sum(-adamElements['arimaPolynomials']['arPolynomial'][1:]) < 0): arPolynomialMatrix[:, 0] = -adamElements['arimaPolynomials']['arPolynomial'][1:] eigenValues = np.abs(eigvals(arPolynomialMatrix)) if any(eigenValues > 1): return 1e100 * np.max(eigenValues) - if etsModel or arimaModel: - if xregModel: + if model_type_dict['ets_model'] or arima_checked['arima_model']: + if explanatory_checked['xreg_model']: if regressors == "adapt": eigenValues = np.abs(eigvals( - adamElements['matF'] - - np.diag(adamElements['vecG'].flatten()) @ - measurement_inverter(adamElements['matWt'][:obsInSample]).T @ - adamElements['matWt'][:obsInSample] / obsInSample + adamElements['mat_f'] - + np.diag(adamElements['vec_g'].flatten()) @ + measurement_inverter(adamElements['mat_wt'][:observations_dict['obs_in_sample']]).T @ + adamElements['mat_wt'][:observations_dict['obs_in_sample']] / observations_dict['obs_in_sample'] )) else: - indices = np.arange(componentsNumberETS + componentsNumberARIMA) + indices = np.arange(components_dict['components_number_ets'] + components_dict['components_number_arima']) eigenValues = np.abs(eigvals( - adamElements['matF'][np.ix_(indices, indices)] - - adamElements['vecG'][indices] @ - adamElements['matWt'][obsInSample-1, indices] + adamElements['mat_f'][np.ix_(indices, indices)] - + adamElements['vec_g'][indices] @ + adamElements['mat_wt'][observations_dict['obs_in_sample']-1, indices] )) else: - if etsModel or (arimaModel and maEstimate and (sum(adamElements['arimaPolynomials']['maPolynomial'][1:]) >= 1 or sum(adamElements['arimaPolynomials']['maPolynomial'][1:]) < 0)): + if model_type_dict['ets_model'] or (arima_checked['arima_model'] and arima_checked['ma_estimate'] and (sum(adamElements['arimaPolynomials']['maPolynomial'][1:]) >= 1 or sum(adamElements['arimaPolynomials']['maPolynomial'][1:]) < 0)): eigenValues = np.abs(eigvals( - adamElements['matF'] - - adamElements['vecG'] @ adamElements['matWt'][obsInSample-1] + adamElements['mat_f'] - + adamElements['vec_g'] @ adamElements['mat_wt'][observations_dict['obs_in_sample']-1] )) else: eigenValues = np.array([0]) @@ -127,233 +135,267 @@ def CF(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, yInSamp return 1e100 * np.max(eigenValues) # Write down the initials in the recent profile - profilesRecentTable[:] = adamElements['matVt'][:, :lagsModelMax] + profile_dict['profiles_recent_table'][:] = adamElements['mat_vt'][:, :lags_dict['lags_model_max']] # Fitter and the losses calculation - adamFitted = adamFitterWrap(adamElements['matVt'], adamElements['matWt'], adamElements['matF'], adamElements['vecG'], - lagsModelAll, indexLookupTable, profilesRecentTable, - Etype, Ttype, Stype, componentsNumberETS, componentsNumberETSSeasonal, - componentsNumberARIMA, xregNumber, constantRequired, - yInSample, ot, any([t == "complete" or t == "backcasting" for t in initialType])) - - if not multisteps: - if loss == "likelihood": - scale = scaler(distribution, Etype, adamFitted['errors'][otLogical], - adamFitted['yFitted'][otLogical], obsInSample, other) + adam_fitted = adam_fitter(adamElements['mat_vt'], + adamElements['mat_wt'], + adamElements['mat_f'], + adamElements['vec_g'], + lags_dict['lags_model_all'], + profile_dict['index_lookup_table'], + profile_dict['profiles_recent_table'], + model_type_dict['error_type'], + model_type_dict['trend_type'], + model_type_dict['season_type'], + components_dict['components_number_ets'], + components_dict['components_number_ets_seasonal'], + components_dict['components_number_arima'], + explanatory_checked['xreg_number'], + constants_checked['constant_required'], + observations_dict['y_in_sample'], + observations_dict['ot'], + any([t == "complete" or t == "backcasting" for t in initials_checked['initial_type']])) + + if not general['multisteps']: + if general['loss'] == "likelihood": + scale = scaler(general['distribution_new'], + model_type_dict['error_type'], + adam_fitted['errors'][observations_dict['ot_logical']], + adam_fitted['yFitted'][observations_dict['ot_logical']], + observations_dict['obs_in_sample'], + other) # Calculate the likelihood - CFValue = -np.sum(calculate_likelihood(distribution, Etype, yInSample[otLogical], - adamFitted['yFitted'][otLogical], scale, other)) + CFValue = -np.sum(calculate_likelihood(general['distribution_new'], + model_type_dict['error_type'], + observations_dict['y_in_sample'][observations_dict['ot_logical']], + adam_fitted['yFitted'][observations_dict['ot_logical']], + scale, + other)) # Differential entropy for the logLik of occurrence model - if occurrenceModel or any(~otLogical): - CFValueEntropy = calculate_entropy(distribution, scale, other, obsZero, - adamFitted['yFitted'][~otLogical]) + if observations_dict.get('occurrence_model', False) or any(~observations_dict['ot_logical']): + CFValueEntropy = calculate_entropy(general['distribution_new'], + scale, + other, + observations_dict['obs_zero'], + adam_fitted['yFitted'][~observations_dict['ot_logical']]) if np.isnan(CFValueEntropy) or CFValueEntropy < 0: CFValueEntropy = np.inf CFValue += CFValueEntropy - elif loss == "MSE": - CFValue = np.sum(adamFitted['errors']**2) / obsInSample - elif loss == "MAE": - CFValue = np.sum(np.abs(adamFitted['errors'])) / obsInSample - elif loss == "HAM": - CFValue = np.sum(np.sqrt(np.abs(adamFitted['errors']))) / obsInSample - elif loss in ["LASSO", "RIDGE"]: - persistenceToSkip = componentsNumberETS + persistenceXregEstimate * xregNumber + \ - phiEstimate + sum(arOrders) + sum(maOrders) - - if phiEstimate: - B[componentsNumberETS + persistenceXregEstimate * xregNumber] = \ - 1 - B[componentsNumberETS + persistenceXregEstimate * xregNumber] - - j = componentsNumberETS + persistenceXregEstimate * xregNumber + phiEstimate - - if arimaModel and (sum(maOrders) > 0 or sum(arOrders) > 0): - for i in range(len(lags)): - B[j:j+arOrders[i]] = 1 - B[j:j+arOrders[i]] - j += arOrders[i] + maOrders[i] - - if any([t == "optimal" or t == "backcasting" for t in initialType]): - if xregNumber > 0: + elif general['loss'] == "MSE": + CFValue = np.sum(adam_fitted['errors']**2) / observations_dict['obs_in_sample'] + elif general['loss'] == "MAE": + CFValue = np.sum(np.abs(adam_fitted['errors'])) / observations_dict['obs_in_sample'] + elif general['loss'] == "HAM": + CFValue = np.sum(np.sqrt(np.abs(adam_fitted['errors']))) / observations_dict['obs_in_sample'] + elif general['loss'] in ["LASSO", "RIDGE"]: + persistenceToSkip = (components_dict['components_number_ets'] + + persistence_checked['persistence_xreg_estimate'] * explanatory_checked['xreg_number'] + + phi_dict['phi_estimate'] + + sum(arima_checked['ar_orders']) + + sum(arima_checked['ma_orders'])) + + if phi_dict['phi_estimate']: + B[components_dict['components_number_ets'] + + persistence_checked['persistence_xreg_estimate'] * explanatory_checked['xreg_number']] = \ + 1 - B[components_dict['components_number_ets'] + + persistence_checked['persistence_xreg_estimate'] * explanatory_checked['xreg_number']] + + j = (components_dict['components_number_ets'] + + persistence_checked['persistence_xreg_estimate'] * explanatory_checked['xreg_number'] + + phi_dict['phi_estimate']) + + if arima_checked['arima_model'] and (sum(arima_checked['ma_orders']) > 0 or sum(arima_checked['ar_orders']) > 0): + for i in range(len(lags_dict['lags'])): + B[j:j+arima_checked['ar_orders'][i]] = 1 - B[j:j+arima_checked['ar_orders'][i]] + j += arima_checked['ar_orders'][i] + arima_checked['ma_orders'][i] + + if any([t == "optimal" or t == "backcasting" for t in initials_checked['initial_type']]): + if explanatory_checked['xreg_number'] > 0: B = np.concatenate([B[:persistenceToSkip], - B[-xregNumber:] / denominator if Etype == "A" else B[-xregNumber:]]) + B[-explanatory_checked['xreg_number']:] / general['denominator'] + if model_type_dict['error_type'] == "A" + else B[-explanatory_checked['xreg_number']:]]) else: B = B[:persistenceToSkip] - if Etype == "A": - CFValue = (1 - lambda_param) * np.sqrt(np.sum((adamFitted['errors'] / yDenominator)**2) / obsInSample) + if model_type_dict['error_type'] == "A": + CFValue = ((1 - general['lambda']) * + np.sqrt(np.sum((adam_fitted['errors'] / general['y_denominator'])**2) / + observations_dict['obs_in_sample'])) else: # "M" - CFValue = (1 - lambda_param) * np.sqrt(np.sum(np.log(1 + adamFitted['errors'])**2) / obsInSample) + CFValue = ((1 - general['lambda']) * + np.sqrt(np.sum(np.log(1 + adam_fitted['errors'])**2) / + observations_dict['obs_in_sample'])) - if loss == "LASSO": - CFValue += lambda_param * np.sum(np.abs(B)) + if general['loss'] == "LASSO": + CFValue += general['lambda'] * np.sum(np.abs(B)) else: # "RIDGE" - CFValue += lambda_param * np.sqrt(np.sum(B**2)) - - elif loss == "custom": - CFValue = lossFunction(actual=yInSample, fitted=adamFitted['yFitted'], B=B) - else: - adamErrors = adamErrorerWrap( - adamFitted['matVt'], adamElements['matWt'], adamElements['matF'], - lagsModelAll, indexLookupTable, profilesRecentTable, - Etype, Ttype, Stype, - componentsNumberETS, componentsNumberETSSeasonal, - componentsNumberARIMA, xregNumber, constantRequired, horizon, - yInSample, ot - ) - - CFValue = calculate_multistep_loss(loss, adamErrors, obsInSample, horizon) - + CFValue += general['lambda'] * np.sqrt(np.sum(B**2)) + + elif general['loss'] == "custom": + CFValue = general['loss_function'](actual=observations_dict['y_in_sample'], + fitted=adam_fitted['yFitted'], + B=B) + #else: + # currently no multistep loss function + + #adam_errors = adam_errorer_wrap( + # adam_fitted['matVt'], adamElements['matWt'], adamElements['matF'], + # lags_dict['lags_model_all'], index_lookup_table, profiles_recent_table, + # model_type_dict['error_type'], model_type_dict['trend_type'], model_type_dict['season_type'], + # components_dict['components_number_ets'], components_dict['components_number_ets_seasonal'], + # components_dict['components_number_arima'], explanatory_checked['xreg_number'], constants_checked['constant_required'], general['horizon'], + # observations_dict['y_in_sample'], observations_dict['ot']) + + #CFValue = calculate_multistep_loss(general['loss'], adamErrors, observations_dict['obs_in_sample'], general['horizon']) if np.isnan(CFValue): CFValue = 1e300 return CFValue -def logLikADAM(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, yInSample, - ot, otLogical, occurrenceModel, pFitted, obsInSample, - componentsNumberETS, componentsNumberETSSeasonal, componentsNumberETSNonSeasonal, - componentsNumberARIMA, lags, lagsModel, lagsModelAll, lagsModelMax, - indexLookupTable, profilesRecentTable, matVt, matWt, matF, vecG, - persistenceEstimate, persistenceLevelEstimate, persistenceTrendEstimate, - persistenceSeasonalEstimate, persistenceXregEstimate, phiEstimate, - initialType, initialEstimate, initialLevelEstimate, initialTrendEstimate, - initialSeasonalEstimate, initialArimaEstimate, initialXregEstimate, - arimaModel, nonZeroARI, nonZeroMA, arEstimate, maEstimate, arimaPolynomials, - arOrders, iOrders, maOrders, arRequired, maRequired, armaParameters, - xregModel, xregNumber, xregParametersMissing, xregParametersIncluded, - xregParametersEstimated, xregParametersPersistence, constantRequired, - constantEstimate, bounds, loss, lossFunction, distribution, horizon, - multisteps, denominator=None, yDenominator=None, other=None, - otherParameterEstimate=False, lambda_param=None, arPolynomialMatrix=None, - maPolynomialMatrix=None, hessianCalculation=False): + +def log_Lik_ADAM( + B, + model_type_dict, + components_dict, + lags_dict, + adam_created, + persistence_dict, + initials_dict, + arima_dict, + explanatory_dict, + phi_dict, + constant_dict, + observations_dict, + occurrence_dict, + general_dict, + profile_dict, + multisteps = False +): if not multisteps: - if loss in ["LASSO", "RIDGE"]: + #print(profile_dict) + if general_dict['loss'] in ["LASSO", "RIDGE"]: return 0 else: - distributionNew = { + general_dict['distribution_new'] = { "MSE": "dnorm", "MAE": "dlaplace", "HAM": "ds" - }.get(loss, distribution) + }.get(general_dict['loss'], general_dict['distribution_new']) - lossNew = "likelihood" if loss in ["MSE", "MAE", "HAM"] else loss + general_dict['loss_new'] = "likelihood" if general_dict['loss'] in ["MSE", "MAE", "HAM"] else general_dict['loss'] # Call CF function with bounds="none" - logLikReturn = -CF(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, - yInSample, ot, otLogical, occurrenceModel, obsInSample, - componentsNumberETS, componentsNumberETSSeasonal, - componentsNumberETSNonSeasonal, componentsNumberARIMA, - lags, lagsModel, lagsModelAll, lagsModelMax, - indexLookupTable, profilesRecentTable, matVt, matWt, matF, vecG, - persistenceEstimate, persistenceLevelEstimate, - persistenceTrendEstimate, persistenceSeasonalEstimate, - persistenceXregEstimate, phiEstimate, initialType, - initialEstimate, initialLevelEstimate, initialTrendEstimate, - initialSeasonalEstimate, initialArimaEstimate, - initialXregEstimate, arimaModel, nonZeroARI, nonZeroMA, - arEstimate, maEstimate, arimaPolynomials, arOrders, iOrders, - maOrders, arRequired, maRequired, armaParameters, xregModel, - xregNumber, xregParametersMissing, xregParametersIncluded, - xregParametersEstimated, xregParametersPersistence, - constantRequired, constantEstimate, bounds="none", loss=lossNew, - lossFunction=lossFunction, distribution=distributionNew, - horizon=horizon, multisteps=multisteps, denominator=denominator, - yDenominator=yDenominator, other=other, - otherParameterEstimate=otherParameterEstimate, - lambda_param=lambda_param, arPolynomialMatrix=arPolynomialMatrix, - maPolynomialMatrix=maPolynomialMatrix) + logLikReturn = -CF(B, model_type_dict, + components_dict, + lags_dict, + adam_created, + persistence_dict, + initials_dict, + arima_dict, + explanatory_dict, + phi_dict, + constant_dict, + observations_dict, + profile_dict, + general_dict, + bounds = None) # Handle occurrence model - if occurrenceModel: + if occurrence_dict['occurrence_model']: if np.isinf(logLikReturn): logLikReturn = 0 - if any(1 - pFitted[~otLogical] == 0) or any(pFitted[otLogical] == 0): - ptNew = pFitted[(pFitted != 0) & (pFitted != 1)] - otNew = ot[(pFitted != 0) & (pFitted != 1)] - if len(ptNew) == 0: + if any(1 - occurrence_dict['p_fitted'][~observations_dict['ot_logical']] == 0) or any(occurrence_dict['p_fitted'][observations_dict['ot_logical']] == 0): + pt_new = occurrence_dict['p_fitted'][(occurrence_dict['p_fitted'] != 0) & (occurrence_dict['p_fitted'] != 1)] + ot_new = observations_dict['ot'][(occurrence_dict['p_fitted'] != 0) & (occurrence_dict['p_fitted'] != 1)] + if len(pt_new) == 0: return logLikReturn else: - return logLikReturn + np.sum(np.log(ptNew[otNew == 1])) + np.sum(np.log(1 - ptNew[otNew == 0])) + return logLikReturn + np.sum(np.log(pt_new[ot_new == 1])) + np.sum(np.log(1 - pt_new[ot_new == 0])) else: - return logLikReturn + np.sum(np.log(pFitted[otLogical])) + np.sum(np.log(1 - pFitted[~otLogical])) + return logLikReturn + np.sum(np.log(occurrence_dict['p_fitted'][observations_dict['ot_logical']])) + np.sum(np.log(1 - occurrence_dict['p_fitted'][~observations_dict['ot_logical']])) else: return logLikReturn else: # Call CF function with bounds="none" - logLikReturn = CF(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, - yInSample, ot, otLogical, occurrenceModel, obsInSample, - componentsNumberETS, componentsNumberETSSeasonal, - componentsNumberETSNonSeasonal, componentsNumberARIMA, - lags, lagsModel, lagsModelAll, lagsModelMax, - indexLookupTable, profilesRecentTable, matVt, matWt, matF, vecG, - persistenceEstimate, persistenceLevelEstimate, - persistenceTrendEstimate, persistenceSeasonalEstimate, - persistenceXregEstimate, phiEstimate, initialType, - initialEstimate, initialLevelEstimate, initialTrendEstimate, - initialSeasonalEstimate, initialArimaEstimate, - initialXregEstimate, arimaModel, nonZeroARI, nonZeroMA, - arEstimate, maEstimate, arimaPolynomials, arOrders, iOrders, - maOrders, arRequired, maRequired, armaParameters, xregModel, - xregNumber, xregParametersMissing, xregParametersIncluded, - xregParametersEstimated, xregParametersPersistence, - constantRequired, constantEstimate, bounds="none", loss=loss, - lossFunction=lossFunction, distribution=distribution, - horizon=horizon, multisteps=multisteps, denominator=denominator, - yDenominator=yDenominator, other=other, - otherParameterEstimate=otherParameterEstimate, - lambda_param=lambda_param, arPolynomialMatrix=arPolynomialMatrix, - maPolynomialMatrix=maPolynomialMatrix) + logLikReturn = CF(B, + model_type_dict, + components_dict, + lags_dict, + adam_created, + persistence_dict, + initials_dict, + arima_dict, + explanatory_dict, + phi_dict, + constant_dict, + observations_dict, + profile_dict, + general_dict, + bounds = None + ) # Concentrated log-likelihoods for the multistep losses - if loss in ["MSEh", "aMSEh", "TMSE", "aTMSE", "MSCE", "aMSCE"]: - logLikReturn = -(obsInSample - horizon) / 2 * (np.log(2 * np.pi) + 1 + np.log(logLikReturn)) - elif loss in ["GTMSE", "aGTMSE"]: - logLikReturn = -(obsInSample - horizon) / 2 * (np.log(2 * np.pi) + 1 + logLikReturn) - elif loss in ["MAEh", "TMAE", "GTMAE", "MACE"]: - logLikReturn = -(obsInSample - horizon) * (np.log(2) + 1 + np.log(logLikReturn)) - elif loss in ["HAMh", "THAM", "GTHAM", "CHAM"]: - logLikReturn = -(obsInSample - horizon) * (np.log(4) + 2 + 2 * np.log(logLikReturn)) - elif loss in ["GPL", "aGPL"]: - logLikReturn = -(obsInSample - horizon) / 2 * (horizon * np.log(2 * np.pi) + horizon + logLikReturn) / horizon + if general_dict['loss'] in ["MSEh", "aMSEh", "TMSE", "aTMSE", "MSCE", "aMSCE"]: + # is horizon different than h? + logLikReturn = -(observations_dict['obs_in_sample'] - general_dict['h']) / 2 * (np.log(2 * np.pi) + 1 + np.log(logLikReturn)) + elif general_dict['loss'] in ["GTMSE", "aGTMSE"]: + logLikReturn = -(observations_dict['obs_in_sample'] - general_dict['h']) / 2 * (np.log(2 * np.pi) + 1 + logLikReturn) + elif general_dict['loss'] in ["MAEh", "TMAE", "GTMAE", "MACE"]: + logLikReturn = -(observations_dict['obs_in_sample'] - general_dict['h']) * (np.log(2) + 1 + np.log(logLikReturn)) + elif general_dict['loss'] in ["HAMh", "THAM", "GTHAM", "CHAM"]: + logLikReturn = -(observations_dict['obs_in_sample'] - general_dict['h']) * (np.log(4) + 2 + 2 * np.log(logLikReturn)) + elif general_dict['loss'] in ["GPL", "aGPL"]: + logLikReturn = -(observations_dict['obs_in_sample'] - general_dict['h']) / 2 * (general_dict['h'] * np.log(2 * np.pi) + general_dict['h'] + logLikReturn) / general_dict['h'] # Make likelihood comparable - logLikReturn = logLikReturn / (obsInSample - horizon) * obsInSample + logLikReturn = logLikReturn / (observations_dict['obs_in_sample'] - general_dict['h']) * observations_dict['obs_in_sample'] # Handle multiplicative model - if Etype == "M": + if model_type_dict['ets_model'] and model_type_dict['error_type'] == "M": # Fill in the matrices - adamElements = filler(B, etsModel, Etype, Ttype, Stype, modelIsTrendy, modelIsSeasonal, - componentsNumberETS, componentsNumberETSNonSeasonal, - componentsNumberETSSeasonal, componentsNumberARIMA, - lags, lagsModel, lagsModelMax, matVt, matWt, matF, vecG, - persistenceEstimate, persistenceLevelEstimate, - persistenceTrendEstimate, persistenceSeasonalEstimate, - persistenceXregEstimate, phiEstimate, initialType, - initialEstimate, initialLevelEstimate, initialTrendEstimate, - initialSeasonalEstimate, initialArimaEstimate, - initialXregEstimate, arimaModel, arEstimate, maEstimate, - arOrders, iOrders, maOrders, arRequired, maRequired, - armaParameters, nonZeroARI, nonZeroMA, arimaPolynomials, - xregModel, xregNumber, xregParametersMissing, - xregParametersIncluded, xregParametersEstimated, - xregParametersPersistence, constantEstimate) + adam_elements = filler(B, + model_type_dict, + components_dict, + lags_dict, + adam_created, + persistence_dict, + initials_dict, + arima_dict, + explanatory_dict, + phi_dict, + constant_dict) # Write down the initials in the recent profile - profilesRecentTable[:] = adamElements['matVt'][:, :lagsModelMax] + profile_dict['profiles_recent_table'][:] = adam_elements['matVt'][:, :lags_dict['lags_model_max']] # Fit the model again to extract the fitted values - adamFitted = adamFitterWrap(adamElements['matVt'], adamElements['matWt'], - adamElements['matF'], adamElements['vecG'], - lagsModelAll, indexLookupTable, profilesRecentTable, - Etype, Ttype, Stype, componentsNumberETS, - componentsNumberETSSeasonal, componentsNumberARIMA, - xregNumber, constantRequired, yInSample, ot, - any(t in ["complete", "backcasting"] for t in initialType)) + adam_fitted = adam_fitter(adam_elements['mat_vt'], + adam_elements['mat_wt'], + adam_elements['mat_f'], + adam_elements['vec_g'], + lags_dict['lags_model_all'], + profile_dict['index_lookup_table'], + profile_dict['profiles_recent_table'], + model_type_dict['error_type'], + model_type_dict['trend_type'], + model_type_dict['season_type'], + components_dict['components_number_ets'], + components_dict['components_number_ets_seasonal'], + components_dict['components_number_arima'], + explanatory_dict['xreg_number'], + constant_dict['constant_required'], + observations_dict['y_in_sample'], + observations_dict['ot'], + any([t == "complete" or t == "backcasting" for t in initials_dict['initial_type']])) - logLikReturn -= np.sum(np.log(np.abs(adamFitted['yFitted']))) + logLikReturn -= np.sum(np.log(np.abs(adam_fitted['y_fitted']))) return logLikReturn \ No newline at end of file diff --git a/python/smooth/adam_general/core/utils/dump.py b/python/smooth/adam_general/core/utils/dump.py new file mode 100644 index 00000000..8a509dfd --- /dev/null +++ b/python/smooth/adam_general/core/utils/dump.py @@ -0,0 +1,71 @@ +# estimator commented out lines 2754 to 2821 +adam_created_arima = filler( + b_values['B'], + ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, + components_number_ets, components_number_ets_non_seasonal, + components_number_ets_seasonal, components_number_arima, + lags, lags_model, lags_model_max, + adam_created['mat_vt'], adam_created['mat_wt'], adam_created['mat_f'], adam_created['vec_g'], + persistence_estimate, persistence_level_estimate, persistence_trend_estimate, + persistence_seasonal_estimate, persistence_xreg_estimate, + phi_estimate, + initial_type, initial_estimate, + initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, + initial_arima_estimate, initial_xreg_estimate, + arima_model, ar_estimate, ma_estimate, ar_orders, i_orders, ma_orders, + ar_required, ma_required, arma_parameters, + non_zero_ari, non_zero_ma, adam_created['arima_polynomials'], + xreg_model, xreg_number, + xreg_parameters_missing, xreg_parameters_included, + xreg_parameters_estimated, xreg_parameters_persistence, constant_estimate + ) + + # Write down the initials in the recent profile + profiles_recent_table[:] = adam_created_arima['mat_vt'][:, :lags_model_max] + + # Do initial fit to get the state values from the backcasting + adam_fitted = adam_fitter_wrap( + adam_created_arima['mat_vt'], adam_created_arima['mat_wt'], adam_created_arima['mat_f'], adam_created_arima['vec_g'], + lags_model_all, index_lookup_table, profiles_recent_table, + e_type, t_type, s_type, components_number_ets, components_number_ets_seasonal, + components_number_arima, xreg_number, constant_required, + y_in_sample, ot, True + ) + + adam_created['mat_vt'][:, :lags_model_max] = adam_fitted['mat_vt'][:, :lags_model_max] + # Produce new initials + b_values_new = initialiser( + ets_model, e_type, t_type, s_type, model_is_trendy, model_is_seasonal, + components_number_ets_non_seasonal, components_number_ets_seasonal, components_number_ets, + lags, lags_model, lags_model_seasonal, lags_model_arima, lags_model_max, + adam_created['mat_vt'], + persistence_estimate, persistence_level_estimate, persistence_trend_estimate, + persistence_seasonal_estimate, persistence_xreg_estimate, + phi_estimate, initial_type, initial_estimate, + initial_level_estimate, initial_trend_estimate, initial_seasonal_estimate, + initial_arima_estimate, initial_xreg_estimate, + arima_model, ar_required, ma_required, ar_estimate, ma_estimate, ar_orders, ma_orders, + components_number_arima, components_names_arima, initial_arima_number, + xreg_model, xreg_number, + xreg_parameters_estimated, xreg_parameters_persistence, + constant_estimate, constant_name, other_parameter_estimate + ) + B = b_values_new['B'] + # Failsafe, just in case if the initial values contain NA / NaN + B[np.isnan(B)] = b_values['B'][np.isnan(B)] + + + + # Fix for mixed ETS models producing negative values + if (e_type == "M" and any(t in ["A", "Ad"] for t in [t_type, s_type]) or + t_type == "M" and any(t in ["A", "Ad"] for t in [e_type, s_type]) or + s_type == "M" and any(t in ["A", "Ad"] for t in [e_type, t_type])): + if e_type == "M" and ("level" in B) and (B["level"] <= 0): + B["level"] = y_in_sample[0] + if t_type == "M" and ("trend" in B) and (B["trend"] <= 0): + B["trend"] = 1 + seasonal_params = [p for p in B.keys() if p.startswith("seasonal")] + if s_type == "M" and any(B[p] <= 0 for p in seasonal_params): + for p in seasonal_params: + if B[p] <= 0: + B[p] = 1 \ No newline at end of file diff --git a/python/smooth/adam_general/core/utils/ic.py b/python/smooth/adam_general/core/utils/ic.py index fef6c4e2..62ac7608 100644 --- a/python/smooth/adam_general/core/utils/ic.py +++ b/python/smooth/adam_general/core/utils/ic.py @@ -114,7 +114,7 @@ def BICc(loglik, nobs=None, df=None): bic = BIC(loglik, nobs, df) return bic + (np.log(nobs) * df * (df + 1)) / (nobs - df - 1) -def ic_function(ic_name): +def ic_function(ic_name, loglik): """ Select information criterion function based on name @@ -128,11 +128,14 @@ def ic_function(ic_name): function Selected information criterion function """ + value = loglik['value'] + nobs = loglik['nobs'] + df = loglik['df'] ic_functions = { - 'AIC': AIC, - 'AICc': AICc, - 'BIC': BIC, - 'BICc': BICc + 'AIC': AIC(value, nobs, df), + 'AICc': AICc(value, nobs, df), + 'BIC': BIC(value, nobs, df), + 'BICc': BICc(value, nobs, df) } if ic_name not in ic_functions: diff --git "a/python/smooth/adam_general/core/utils/ic.py\357\200\272Zone.Identifier" "b/python/smooth/adam_general/core/utils/ic.py\357\200\272Zone.Identifier" new file mode 100644 index 00000000..a45e1ac4 --- /dev/null +++ "b/python/smooth/adam_general/core/utils/ic.py\357\200\272Zone.Identifier" @@ -0,0 +1,2 @@ +[ZoneTransfer] +ZoneId=3 diff --git a/python/smooth/adam_general/core/utils/likelihood.py b/python/smooth/adam_general/core/utils/likelihood.py new file mode 100644 index 00000000..e69de29b diff --git a/python/smooth/adam_general/core/utils/polynomials.py b/python/smooth/adam_general/core/utils/polynomials.py new file mode 100644 index 00000000..df94f568 --- /dev/null +++ b/python/smooth/adam_general/core/utils/polynomials.py @@ -0,0 +1,8 @@ +def adam_polynomialiser(parameters, ar_orders, i_orders, ma_orders, + ar_estimate, ma_estimate, arma_parameters, lags): + """ + Creates polynomials for ARIMA models. + """ + # Implementation of adam_polynomialiser goes here + # You'll need to move this function from wherever it's currently defined + pass \ No newline at end of file