diff --git a/experiments/amazon-chronos/README.md b/experiments/amazon-chronos/README.md index 4e2a2b9d..3e292051 100644 --- a/experiments/amazon-chronos/README.md +++ b/experiments/amazon-chronos/README.md @@ -1,6 +1,10 @@ # Extended comparison of Chronos against the statistical ensemble -We present an extension to the [original comparison by Nixtla](https://github.com/Nixtla/nixtla/tree/main/experiments/amazon-chronos) of Chronos [1] against the SCUM ensemble [2]. In this analysis on over 200K unique time series across 28 datasets from Benchmark II in the Chronos paper [1], we show that **zero-shot** Chronos models perform comparably to this strong ensemble of 4 statistical models while being significantly faster on average. We follow the original study as closely as possible, including loading task definitions from GluonTS and computing metrics using utilsforecast. + +## Background +A few weeks ago, we presented a [fully reproducible experiment](https://github.com/Nixtla/nixtla/tree/main/experiments/amazon-chronos) showing that Amazon Chronos was 10% less accurate and 500% slower than training classical statistical models. The Amazon team kindly answered by extending our benchmarking efforts, confirming our results for the selected datasets, and showing a differentiated performance for new datasets. + +Here we present an extension to the [original comparison by Nixtla](https://github.com/Nixtla/nixtla/tree/main/experiments/amazon-chronos) of Chronos [1] against the SCUM ensemble [2]. In this analysis on over 200K unique time series across 28 datasets from Benchmark II in the Chronos paper [1], we show that **zero-shot** Chronos models perform comparably to this strong ensemble of 4 statistical models while being significantly faster on average. We follow the original study as closely as possible, including loading task definitions from GluonTS and computing metrics using utilsforecast. ## Empirical Evaluation diff --git a/experiments/amazon-chronos/collect_results.ipynb b/experiments/amazon-chronos/collect_results.ipynb index d7c2f2c1..e2eda55d 100644 --- a/experiments/amazon-chronos/collect_results.ipynb +++ b/experiments/amazon-chronos/collect_results.ipynb @@ -2,12 +2,11 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from metaflow import Flow, Run, Step" + "import pandas as pd" ] }, { @@ -19,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -48,26 +47,29 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ - "save_to_disk = False" + "save_to_disk = False\n", + "collect_results = False" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Make sure to replace these with your run IDs!\n", - "results_all = pd.concat([\n", - " get_results(1712079941097970), # SeasonalNaive\n", - " get_results(1712079795572065), # StatisticalEnsemble\n", - " get_results(1712080010851589), # chronos_mini\n", - " get_results(1712081461874960), # chronos_large\n", - "])\n", + "if collect_results:\n", + " from metaflow import Flow, Run, Step\n", + " results_all = pd.concat([\n", + " get_results(1712079941097970), # SeasonalNaive\n", + " get_results(1712079795572065), # StatisticalEnsemble\n", + " get_results(1712080010851589), # chronos_mini\n", + " get_results(1712081461874960), # chronos_large\n", + " ])\n", "if save_to_disk:\n", " results_all.to_csv(\"results/results_all.csv\", index=False)" ] @@ -81,14 +83,15 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "results_all = []\n", - "for run in list(Flow(\"ForecastEvaluation\").runs())[:5]:\n", - " results_all.append(get_results(run.id))\n", - "results_all = pd.concat(results_all).dropna(subset=\"value\").drop_duplicates([\"dataset\", \"model\", \"metric\"])\n", + "if collect_results:\n", + " results_all = []\n", + " for run in list(Flow(\"ForecastEvaluation\").runs())[:5]:\n", + " results_all.append(get_results(run.id))\n", + " results_all = pd.concat(results_all).dropna(subset=\"value\").drop_duplicates([\"dataset\", \"model\", \"metric\"])\n", "if save_to_disk:\n", " results_all.to_csv(\"results/results_all.csv\", index=False)" ] @@ -102,11 +105,31 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ - "results_all = pd.read_csv(\"results/results_all.csv\")" + "results_all = pd.read_csv(\"results/results_all.csv\")\n", + "results_all = results_all.query(\"model.str.startswith('amazon')\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Patch Results modifying context length seen by the Statistical Ensemble" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "results_all = pd.concat([\n", + " results_all,\n", + " pd.read_csv(\"results/complete-results.csv\"),\n", + "])" ] }, { @@ -118,7 +141,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -127,7 +150,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -203,26 +226,26 @@ " \n", " australian_electricity_demand\n", " 1.340\n", - " 1.215\n", + " 1.115\n", " 1.184\n", " 0.882\n", " 0.098\n", - " 0.058\n", + " 0.057\n", " 0.054\n", " 0.042\n", " 0.059\n", - " 0.055\n", + " 0.051\n", " 0.051\n", " 0.040\n", - " 0.333\n", - " 12602.872\n", + " 0.360\n", + " 2716.869\n", " 4.703\n", " 2.116\n", " \n", " \n", " car_parts_without_missing\n", " 1.120\n", - " 1.050\n", + " 1.051\n", " 0.807\n", " 0.803\n", " 2.225\n", @@ -233,15 +256,15 @@ " 0.897\n", " 0.947\n", " 0.957\n", - " 0.944\n", - " 110.575\n", + " 0.915\n", + " 37.927\n", " 61.375\n", " 5.296\n", " \n", " \n", " cif_2016\n", " 1.289\n", - " 0.918\n", + " 0.902\n", " 0.986\n", " 1.025\n", " 0.056\n", @@ -249,30 +272,30 @@ " 0.015\n", " 0.019\n", " 0.094\n", - " 0.058\n", + " 0.057\n", " 0.074\n", " 0.076\n", - " 0.672\n", - " 83.170\n", + " 0.603\n", + " 9.755\n", " 3.744\n", " 1.532\n", " \n", " \n", " covid_deaths\n", " 7.762\n", - " 5.246\n", + " 5.248\n", " 6.540\n", " 6.555\n", " 0.116\n", - " 0.025\n", + " 0.024\n", " 0.050\n", " 0.072\n", " 0.093\n", " 0.054\n", " 0.205\n", " 0.204\n", - " 0.883\n", - " 86.333\n", + " 0.815\n", + " 22.031\n", " 32.049\n", " 3.871\n", " \n", @@ -290,72 +313,72 @@ " 0.782\n", " 0.809\n", " 0.817\n", - " 11.452\n", - " 981.563\n", + " 12.371\n", + " 874.045\n", " 8661.922\n", " 653.726\n", " \n", " \n", " ercot\n", " 0.761\n", - " 1.979\n", + " 1.356\n", " 0.578\n", " 0.585\n", " 0.039\n", - " 0.050\n", + " 0.034\n", " 0.017\n", " 0.016\n", " 0.016\n", - " 0.040\n", + " 0.027\n", " 0.012\n", " 0.012\n", - " 0.375\n", - " 5119.783\n", + " 0.397\n", + " 390.501\n", " 3.666\n", " 1.979\n", " \n", " \n", " ett_small_15min\n", " 0.768\n", - " NaN\n", + " 0.638\n", " 0.714\n", " 0.739\n", " 0.143\n", - " NaN\n", + " 0.083\n", " 0.083\n", " 0.088\n", " 0.095\n", - " NaN\n", + " 0.100\n", " 0.110\n", " 0.116\n", - " 0.422\n", - " NaN\n", + " 0.457\n", + " 5034.763\n", " 4.933\n", " 2.050\n", " \n", " \n", " ett_small_1h\n", " 0.932\n", - " 1.003\n", + " 0.852\n", " 0.737\n", " 0.805\n", " 0.153\n", - " 0.123\n", + " 0.109\n", " 0.083\n", " 0.085\n", " 0.103\n", - " 0.131\n", + " 0.117\n", " 0.091\n", " 0.100\n", - " 0.183\n", - " 4206.250\n", + " 0.209\n", + " 332.130\n", " 4.607\n", " 1.688\n", " \n", " \n", " exchange_rate\n", " 1.524\n", - " 1.429\n", + " 1.407\n", " 1.882\n", " 2.118\n", " 0.016\n", @@ -366,34 +389,34 @@ " 0.004\n", " 0.006\n", " 0.007\n", - " 0.075\n", - " 259.485\n", + " 0.094\n", + " 9.354\n", " 3.613\n", " 1.554\n", " \n", " \n", " fred_md\n", " 1.101\n", - " 0.489\n", + " 0.482\n", " 0.571\n", " 0.564\n", " 0.082\n", - " 0.035\n", + " 0.033\n", " 0.029\n", " 0.029\n", " 0.073\n", " 0.052\n", " 0.052\n", " 0.052\n", - " 0.872\n", - " 235.237\n", + " 0.847\n", + " 49.065\n", " 15.487\n", " 2.359\n", " \n", " \n", " hospital\n", " 0.921\n", - " 0.748\n", + " 0.749\n", " 0.810\n", " 0.815\n", " 0.083\n", @@ -404,8 +427,8 @@ " 0.086\n", " 0.093\n", " 0.093\n", - " 0.880\n", - " 78.938\n", + " 0.840\n", + " 30.615\n", " 20.415\n", " 2.852\n", " \n", @@ -423,15 +446,15 @@ " 0.716\n", " 0.818\n", " 0.827\n", - " 27.139\n", - " 15599.160\n", + " 29.344\n", + " 15082.485\n", " 8071.688\n", " 662.490\n", " \n", " \n", " nn5_daily_without_missing\n", " 1.011\n", - " 0.844\n", + " 0.842\n", " 0.824\n", " 0.900\n", " 0.535\n", @@ -442,8 +465,8 @@ " 0.105\n", " 0.103\n", " 0.112\n", - " 0.882\n", - " 235.894\n", + " 0.812\n", + " 38.450\n", " 57.016\n", " 5.834\n", " \n", @@ -461,46 +484,46 @@ " 0.060\n", " 0.058\n", " 0.058\n", - " 0.812\n", - " 51.829\n", + " 0.817\n", + " 5.758\n", " 3.854\n", " 1.515\n", " \n", " \n", " traffic\n", " 1.785\n", - " 1.163\n", + " 1.089\n", " 0.644\n", " 0.640\n", " 0.411\n", - " 0.353\n", + " 0.341\n", " 0.115\n", " 0.120\n", " 0.244\n", - " 0.179\n", + " 0.161\n", " 0.077\n", " 0.082\n", - " 2.627\n", - " 12285.943\n", + " 3.057\n", + " 4173.052\n", " 202.267\n", " 18.898\n", " \n", " \n", " weather\n", " 0.755\n", - " 0.675\n", + " 0.691\n", " 0.565\n", " 0.598\n", " 0.702\n", - " 0.207\n", + " 0.201\n", " 0.122\n", " 0.128\n", " 0.213\n", - " 0.312\n", + " 0.314\n", " 0.327\n", " 0.334\n", - " 19.922\n", - " 4159.202\n", + " 22.065\n", + " 1108.493\n", " 873.384\n", " 92.715\n", " \n", @@ -518,8 +541,8 @@ " 0.073\n", " 0.076\n", " 0.082\n", - " 0.842\n", - " 84.964\n", + " 0.826\n", + " 27.710\n", " 36.095\n", " 4.165\n", " \n", @@ -537,8 +560,8 @@ " 0.078\n", " 0.087\n", " 0.092\n", - " 0.836\n", - " 56.230\n", + " 0.798\n", + " 7.182\n", " 6.025\n", " 1.691\n", " \n", @@ -556,15 +579,15 @@ " 0.088\n", " 0.107\n", " 0.129\n", - " 0.854\n", - " 49.356\n", + " 0.785\n", + " 5.733\n", " 3.797\n", " 1.471\n", " \n", " \n", " m3_monthly\n", " 1.146\n", - " 0.826\n", + " 0.827\n", " 0.854\n", " 0.899\n", " 0.166\n", @@ -575,8 +598,8 @@ " 0.067\n", " 0.070\n", " 0.071\n", - " 0.904\n", - " 155.557\n", + " 0.902\n", + " 69.007\n", " 79.104\n", " 7.552\n", " \n", @@ -594,8 +617,8 @@ " 0.022\n", " 0.024\n", " 0.025\n", - " 0.809\n", - " 57.059\n", + " 0.800\n", + " 7.381\n", " 5.051\n", " 1.609\n", " \n", @@ -613,15 +636,15 @@ " 0.046\n", " 0.048\n", " 0.053\n", - " 0.877\n", - " 60.618\n", + " 0.820\n", + " 12.747\n", " 14.497\n", " 2.305\n", " \n", " \n", " m3_yearly\n", " 3.172\n", - " 2.704\n", + " 2.706\n", " 3.062\n", " 3.486\n", " 0.157\n", @@ -632,8 +655,8 @@ " 0.080\n", " 0.089\n", " 0.101\n", - " 0.838\n", - " 53.385\n", + " 0.812\n", + " 8.576\n", " 9.448\n", " 1.886\n", " \n", @@ -651,15 +674,15 @@ " 0.050\n", " 0.053\n", " 0.055\n", - " 1.491\n", - " 408.369\n", + " 1.633\n", + " 331.264\n", " 2403.956\n", " 170.778\n", " \n", " \n", " m4_yearly\n", " 3.966\n", - " 3.002\n", + " 3.003\n", " 3.559\n", " 3.750\n", " 0.132\n", @@ -670,8 +693,8 @@ " 0.067\n", " 0.081\n", " 0.086\n", - " 1.280\n", - " 197.224\n", + " 1.336\n", + " 117.779\n", " 1027.831\n", " 77.065\n", " \n", @@ -689,8 +712,8 @@ " 0.092\n", " 0.121\n", " 0.132\n", - " 0.866\n", - " 343.512\n", + " 0.865\n", + " 151.140\n", " 55.717\n", " 5.705\n", " \n", @@ -708,8 +731,8 @@ " 0.072\n", " 0.081\n", " 0.090\n", - " 0.843\n", - " 69.271\n", + " 0.814\n", + " 16.846\n", " 12.613\n", " 2.191\n", " \n", @@ -727,8 +750,8 @@ " 0.184\n", " 0.207\n", " 0.245\n", - " 0.842\n", - " 51.096\n", + " 0.807\n", + " 7.067\n", " 5.982\n", " 1.653\n", " \n", @@ -740,31 +763,31 @@ "metric mase \\\n", "model SeasonalNaive StatisticalEnsemble \n", "dataset \n", - "australian_electricity_demand 1.340 1.215 \n", - "car_parts_without_missing 1.120 1.050 \n", - "cif_2016 1.289 0.918 \n", - "covid_deaths 7.762 5.246 \n", + "australian_electricity_demand 1.340 1.115 \n", + "car_parts_without_missing 1.120 1.051 \n", + "cif_2016 1.289 0.902 \n", + "covid_deaths 7.762 5.248 \n", "dominick 0.828 0.848 \n", - "ercot 0.761 1.979 \n", - "ett_small_15min 0.768 NaN \n", - "ett_small_1h 0.932 1.003 \n", - "exchange_rate 1.524 1.429 \n", - "fred_md 1.101 0.489 \n", - "hospital 0.921 0.748 \n", + "ercot 0.761 1.356 \n", + "ett_small_15min 0.768 0.638 \n", + "ett_small_1h 0.932 0.852 \n", + "exchange_rate 1.524 1.407 \n", + "fred_md 1.101 0.482 \n", + "hospital 0.921 0.749 \n", "m5 1.867 1.638 \n", - "nn5_daily_without_missing 1.011 0.844 \n", + "nn5_daily_without_missing 1.011 0.842 \n", "nn5_weekly 1.063 0.974 \n", - "traffic 1.785 1.163 \n", - "weather 0.755 0.675 \n", + "traffic 1.785 1.089 \n", + "weather 0.755 0.691 \n", "m1_monthly 1.314 1.034 \n", "m1_quarterly 2.078 1.594 \n", "m1_yearly 4.894 3.574 \n", - "m3_monthly 1.146 0.826 \n", + "m3_monthly 1.146 0.827 \n", "m3_other 1.474 0.727 \n", "m3_quarterly 1.425 1.164 \n", - "m3_yearly 3.172 2.704 \n", + "m3_yearly 3.172 2.706 \n", "m4_quarterly 1.602 1.142 \n", - "m4_yearly 3.966 3.002 \n", + "m4_yearly 3.966 3.003 \n", "tourism_monthly 1.631 1.441 \n", "tourism_quarterly 1.699 1.506 \n", "tourism_yearly 3.552 3.277 \n", @@ -804,22 +827,22 @@ "metric scaled_crps \\\n", "model SeasonalNaive StatisticalEnsemble \n", "dataset \n", - "australian_electricity_demand 0.098 0.058 \n", + "australian_electricity_demand 0.098 0.057 \n", "car_parts_without_missing 2.225 1.132 \n", "cif_2016 0.056 0.021 \n", - "covid_deaths 0.116 0.025 \n", + "covid_deaths 0.116 0.024 \n", "dominick 2.210 0.529 \n", - "ercot 0.039 0.050 \n", - "ett_small_15min 0.143 NaN \n", - "ett_small_1h 0.153 0.123 \n", + "ercot 0.039 0.034 \n", + "ett_small_15min 0.143 0.083 \n", + "ett_small_1h 0.153 0.109 \n", "exchange_rate 0.016 0.007 \n", - "fred_md 0.082 0.035 \n", + "fred_md 0.082 0.033 \n", "hospital 0.083 0.047 \n", "m5 1.457 0.516 \n", "nn5_daily_without_missing 0.535 0.146 \n", "nn5_weekly 0.256 0.077 \n", - "traffic 0.411 0.353 \n", - "weather 0.702 0.207 \n", + "traffic 0.411 0.341 \n", + "weather 0.702 0.201 \n", "m1_monthly 0.193 0.121 \n", "m1_quarterly 0.143 0.070 \n", "m1_yearly 0.150 0.092 \n", @@ -868,22 +891,22 @@ "metric smape \\\n", "model SeasonalNaive StatisticalEnsemble \n", "dataset \n", - "australian_electricity_demand 0.059 0.055 \n", + "australian_electricity_demand 0.059 0.051 \n", "car_parts_without_missing 0.310 0.897 \n", - "cif_2016 0.094 0.058 \n", + "cif_2016 0.094 0.057 \n", "covid_deaths 0.093 0.054 \n", "dominick 0.160 0.782 \n", - "ercot 0.016 0.040 \n", - "ett_small_15min 0.095 NaN \n", - "ett_small_1h 0.103 0.131 \n", + "ercot 0.016 0.027 \n", + "ett_small_15min 0.095 0.100 \n", + "ett_small_1h 0.103 0.117 \n", "exchange_rate 0.005 0.004 \n", "fred_md 0.073 0.052 \n", "hospital 0.105 0.086 \n", "m5 0.404 0.716 \n", "nn5_daily_without_missing 0.132 0.105 \n", "nn5_weekly 0.066 0.060 \n", - "traffic 0.244 0.179 \n", - "weather 0.213 0.312 \n", + "traffic 0.244 0.161 \n", + "weather 0.213 0.314 \n", "m1_monthly 0.087 0.073 \n", "m1_quarterly 0.095 0.078 \n", "m1_yearly 0.112 0.088 \n", @@ -932,34 +955,34 @@ "metric time \\\n", "model SeasonalNaive StatisticalEnsemble \n", "dataset \n", - "australian_electricity_demand 0.333 12602.872 \n", - "car_parts_without_missing 0.944 110.575 \n", - "cif_2016 0.672 83.170 \n", - "covid_deaths 0.883 86.333 \n", - "dominick 11.452 981.563 \n", - "ercot 0.375 5119.783 \n", - "ett_small_15min 0.422 NaN \n", - "ett_small_1h 0.183 4206.250 \n", - "exchange_rate 0.075 259.485 \n", - "fred_md 0.872 235.237 \n", - "hospital 0.880 78.938 \n", - "m5 27.139 15599.160 \n", - "nn5_daily_without_missing 0.882 235.894 \n", - "nn5_weekly 0.812 51.829 \n", - "traffic 2.627 12285.943 \n", - "weather 19.922 4159.202 \n", - "m1_monthly 0.842 84.964 \n", - "m1_quarterly 0.836 56.230 \n", - "m1_yearly 0.854 49.356 \n", - "m3_monthly 0.904 155.557 \n", - "m3_other 0.809 57.059 \n", - "m3_quarterly 0.877 60.618 \n", - "m3_yearly 0.838 53.385 \n", - "m4_quarterly 1.491 408.369 \n", - "m4_yearly 1.280 197.224 \n", - "tourism_monthly 0.866 343.512 \n", - "tourism_quarterly 0.843 69.271 \n", - "tourism_yearly 0.842 51.096 \n", + "australian_electricity_demand 0.360 2716.869 \n", + "car_parts_without_missing 0.915 37.927 \n", + "cif_2016 0.603 9.755 \n", + "covid_deaths 0.815 22.031 \n", + "dominick 12.371 874.045 \n", + "ercot 0.397 390.501 \n", + "ett_small_15min 0.457 5034.763 \n", + "ett_small_1h 0.209 332.130 \n", + "exchange_rate 0.094 9.354 \n", + "fred_md 0.847 49.065 \n", + "hospital 0.840 30.615 \n", + "m5 29.344 15082.485 \n", + "nn5_daily_without_missing 0.812 38.450 \n", + "nn5_weekly 0.817 5.758 \n", + "traffic 3.057 4173.052 \n", + "weather 22.065 1108.493 \n", + "m1_monthly 0.826 27.710 \n", + "m1_quarterly 0.798 7.182 \n", + "m1_yearly 0.785 5.733 \n", + "m3_monthly 0.902 69.007 \n", + "m3_other 0.800 7.381 \n", + "m3_quarterly 0.820 12.747 \n", + "m3_yearly 0.812 8.576 \n", + "m4_quarterly 1.633 331.264 \n", + "m4_yearly 1.336 117.779 \n", + "tourism_monthly 0.865 151.140 \n", + "tourism_quarterly 0.814 16.846 \n", + "tourism_yearly 0.807 7.067 \n", "\n", "metric \n", "model amazon/chronos-t5-large amazon/chronos-t5-mini \n", @@ -994,13 +1017,27 @@ "tourism_yearly 5.982 1.653 " ] }, - "execution_count": 8, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# StatisticalEnsemble takes >24 hours to forecast on `ett_small_15min`, so the result is missing for this dataset.\n", + "original_datasets = [\n", + " \"m1_monthly\",\n", + " \"m1_quarterly\", \n", + " \"m1_yearly\", \n", + " \"m3_monthly\", \n", + " \"m3_other\", \n", + " \"m3_quarterly\",\n", + " \"m3_yearly\", \n", + " \"m4_quarterly\", \n", + " \"m4_yearly\", \n", + " \"tourism_monthly\",\n", + " \"tourism_quarterly\", \n", + " \"tourism_yearly\", \n", + "]\n", "dataset_order = [\n", " \"australian_electricity_demand\", \n", " \"car_parts_without_missing\",\n", @@ -1018,18 +1055,7 @@ " \"nn5_weekly\", \n", " \"traffic\", \n", " \"weather\",\n", - " \"m1_monthly\",\n", - " \"m1_quarterly\", \n", - " \"m1_yearly\", \n", - " \"m3_monthly\", \n", - " \"m3_other\", \n", - " \"m3_quarterly\",\n", - " \"m3_yearly\", \n", - " \"m4_quarterly\", \n", - " \"m4_yearly\", \n", - " \"tourism_monthly\",\n", - " \"tourism_quarterly\", \n", - " \"tourism_yearly\", \n", + " *original_datasets\n", "]\n", "table = table.reindex(dataset_order)\n", "table" @@ -1046,7 +1072,24 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "from scipy.stats import gmean\n", + "\n", + "def calculate_gmean(table: pd.DataFrame):\n", + " results = []\n", + " for metric in [\"mase\", \"scaled_crps\", \"smape\"]:\n", + " scaled = table[metric].divide(table[metric][\"SeasonalNaive\"], axis=0).fillna(1.0)\n", + " agg = pd.concat({metric: scaled.apply(gmean)})\n", + " results.append(agg)\n", + " return pd.concat(results).round(3).to_frame().T" + ] + }, + { + "cell_type": "code", + "execution_count": 48, "metadata": {}, "outputs": [ { @@ -1094,15 +1137,15 @@ " \n", " 0\n", " 1.0\n", - " 0.836\n", + " 0.809\n", " 0.81\n", " 0.845\n", " 1.0\n", - " 0.505\n", + " 0.483\n", " 0.472\n", " 0.485\n", " 1.0\n", - " 0.987\n", + " 0.964\n", " 1.034\n", " 1.085\n", " \n", @@ -1113,11 +1156,11 @@ "text/plain": [ " mase \\\n", "model SeasonalNaive StatisticalEnsemble amazon/chronos-t5-large \n", - "0 1.0 0.836 0.81 \n", + "0 1.0 0.809 0.81 \n", "\n", " scaled_crps \\\n", "model amazon/chronos-t5-mini SeasonalNaive StatisticalEnsemble \n", - "0 0.845 1.0 0.505 \n", + "0 0.845 1.0 0.483 \n", "\n", " smape \\\n", "model amazon/chronos-t5-large amazon/chronos-t5-mini SeasonalNaive \n", @@ -1125,23 +1168,16 @@ "\n", " \n", "model StatisticalEnsemble amazon/chronos-t5-large amazon/chronos-t5-mini \n", - "0 0.987 1.034 1.085 " + "0 0.964 1.034 1.085 " ] }, - "execution_count": 9, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "from scipy.stats import gmean\n", - "\n", - "results = []\n", - "for metric in [\"mase\", \"scaled_crps\", \"smape\"]:\n", - " scaled = table[metric].divide(table[metric][\"SeasonalNaive\"], axis=0).fillna(1.0)\n", - " agg = pd.concat({metric: scaled.apply(gmean)})\n", - " results.append(agg)\n", - "pd.concat(results).round(3).to_frame().T" + "calculate_gmean(table)" ] }, { @@ -1153,7 +1189,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -1180,7 +1216,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 52, "metadata": {}, "outputs": [ { @@ -1190,34 +1226,34 @@ "\\begin{tabular}{lllllllllllllllll}\n", " & \\multicolumn{4}{r}{mase} & \\multicolumn{4}{r}{scaled_crps} & \\multicolumn{4}{r}{smape} & \\multicolumn{4}{r}{time} \\\\\n", "model & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive \\\\\n", - "australian-electricity-demand & 1.215 & \\underline{1.184} & \\textbf{0.882} & 1.340 & 0.058 & \\underline{0.054} & \\textbf{0.042} & 0.098 & 0.055 & \\underline{0.051} & \\textbf{0.040} & 0.059 & 12602.9 & 4.7 & \\underline{2.1} & \\textbf{0.3} \\\\\n", - "car-parts-without-missing & 1.050 & \\underline{0.807} & \\textbf{0.803} & 1.120 & 1.132 & \\underline{1.059} & \\textbf{1.022} & 2.225 & \\underline{0.897} & 0.947 & 0.957 & \\textbf{0.310} & 110.6 & 61.4 & \\underline{5.3} & \\textbf{0.9} \\\\\n", - "cif-2016 & \\textbf{0.918} & \\underline{0.986} & 1.025 & 1.289 & 0.021 & \\textbf{0.015} & \\underline{0.019} & 0.056 & \\textbf{0.058} & \\underline{0.074} & 0.076 & 0.094 & 83.2 & 3.7 & \\underline{1.5} & \\textbf{0.7} \\\\\n", - "covid-deaths & \\textbf{5.246} & \\underline{6.540} & 6.555 & 7.762 & \\textbf{0.025} & \\underline{0.050} & 0.072 & 0.116 & \\textbf{0.054} & 0.205 & 0.204 & \\underline{0.093} & 86.3 & 32.0 & \\underline{3.9} & \\textbf{0.9} \\\\\n", - "dominick & 0.848 & \\underline{0.786} & \\textbf{0.782} & 0.828 & 0.529 & \\underline{0.414} & \\textbf{0.399} & 2.210 & \\underline{0.782} & 0.809 & 0.817 & \\textbf{0.160} & 981.6 & 8661.9 & \\underline{653.7} & \\textbf{11.5} \\\\\n", - "ercot & 1.979 & \\textbf{0.578} & \\underline{0.585} & 0.761 & 0.050 & \\underline{0.017} & \\textbf{0.016} & 0.039 & 0.040 & \\textbf{0.012} & \\underline{0.012} & 0.016 & 5119.8 & 3.7 & \\underline{2.0} & \\textbf{0.4} \\\\\n", - "ett-small-15min & N/A & \\textbf{0.714} & \\underline{0.739} & 0.768 & N/A & \\textbf{0.083} & \\underline{0.088} & 0.143 & N/A & \\underline{0.110} & 0.116 & \\textbf{0.095} & N/A & 4.9 & \\underline{2.0} & \\textbf{0.4} \\\\\n", - "ett-small-1h & 1.003 & \\textbf{0.737} & \\underline{0.805} & 0.932 & 0.123 & \\textbf{0.083} & \\underline{0.085} & 0.153 & 0.131 & \\textbf{0.091} & \\underline{0.100} & 0.103 & 4206.2 & 4.6 & \\underline{1.7} & \\textbf{0.2} \\\\\n", - "exchange-rate & \\textbf{1.429} & 1.882 & 2.118 & \\underline{1.524} & \\textbf{0.007} & 0.011 & \\underline{0.010} & 0.016 & \\textbf{0.004} & 0.006 & 0.007 & \\underline{0.005} & 259.5 & 3.6 & \\underline{1.6} & \\textbf{0.1} \\\\\n", - "fred-md & \\textbf{0.489} & 0.571 & \\underline{0.564} & 1.101 & 0.035 & \\textbf{0.029} & \\underline{0.029} & 0.082 & \\textbf{0.052} & \\underline{0.052} & 0.052 & 0.073 & 235.2 & 15.5 & \\underline{2.4} & \\textbf{0.9} \\\\\n", - "hospital & \\textbf{0.748} & \\underline{0.810} & 0.815 & 0.921 & \\textbf{0.047} & \\underline{0.059} & 0.061 & 0.083 & \\textbf{0.086} & \\underline{0.093} & 0.093 & 0.105 & 78.9 & 20.4 & \\underline{2.9} & \\textbf{0.9} \\\\\n", - "m5 & 1.638 & \\underline{1.433} & \\textbf{1.432} & 1.867 & \\textbf{0.516} & 0.535 & \\underline{0.532} & 1.457 & \\underline{0.716} & 0.818 & 0.827 & \\textbf{0.404} & 15599.2 & 8071.7 & \\underline{662.5} & \\textbf{27.1} \\\\\n", - "nn5-daily-without-missing & \\underline{0.844} & \\textbf{0.824} & 0.900 & 1.011 & 0.146 & \\textbf{0.119} & \\underline{0.136} & 0.535 & \\underline{0.105} & \\textbf{0.103} & 0.112 & 0.132 & 235.9 & 57.0 & \\underline{5.8} & \\textbf{0.9} \\\\\n", - "nn5-weekly & 0.974 & \\textbf{0.929} & \\underline{0.933} & 1.063 & \\textbf{0.077} & \\underline{0.078} & 0.079 & 0.256 & 0.060 & \\textbf{0.058} & \\underline{0.058} & 0.066 & 51.8 & 3.9 & \\underline{1.5} & \\textbf{0.8} \\\\\n", - "traffic & 1.163 & \\underline{0.644} & \\textbf{0.640} & 1.785 & 0.353 & \\textbf{0.115} & \\underline{0.120} & 0.411 & 0.179 & \\textbf{0.077} & \\underline{0.082} & 0.244 & 12285.9 & 202.3 & \\underline{18.9} & \\textbf{2.6} \\\\\n", - "weather & 0.675 & \\textbf{0.565} & \\underline{0.598} & 0.755 & 0.207 & \\textbf{0.122} & \\underline{0.128} & 0.702 & \\underline{0.312} & 0.327 & 0.334 & \\textbf{0.213} & 4159.2 & 873.4 & \\underline{92.7} & \\textbf{19.9} \\\\\n", - "m1-monthly & \\textbf{1.034} & \\underline{1.089} & 1.193 & 1.314 & 0.121 & \\textbf{0.106} & \\underline{0.111} & 0.193 & \\textbf{0.073} & \\underline{0.076} & 0.082 & 0.087 & 85.0 & 36.1 & \\underline{4.2} & \\textbf{0.8} \\\\\n", - "m1-quarterly & \\textbf{1.594} & \\underline{1.730} & 1.799 & 2.078 & \\textbf{0.070} & 0.089 & \\underline{0.084} & 0.143 & \\textbf{0.078} & \\underline{0.087} & 0.092 & 0.095 & 56.2 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n", - "m1-yearly & \\textbf{3.574} & \\underline{4.329} & 5.160 & 4.894 & \\textbf{0.092} & \\underline{0.117} & 0.119 & 0.150 & \\textbf{0.088} & \\underline{0.107} & 0.129 & 0.112 & 49.4 & 3.8 & \\underline{1.5} & \\textbf{0.9} \\\\\n", - "m3-monthly & \\textbf{0.826} & \\underline{0.854} & 0.899 & 1.146 & \\textbf{0.078} & \\underline{0.083} & 0.084 & 0.166 & \\textbf{0.067} & \\underline{0.070} & 0.071 & 0.086 & 155.6 & 79.1 & \\underline{7.6} & \\textbf{0.9} \\\\\n", - "m3-other & \\textbf{0.727} & \\underline{0.845} & 0.869 & 1.474 & \\textbf{0.027} & 0.033 & \\underline{0.032} & 0.070 & \\textbf{0.022} & \\underline{0.024} & 0.025 & 0.041 & 57.1 & 5.1 & \\underline{1.6} & \\textbf{0.8} \\\\\n", - "m3-quarterly & \\textbf{1.164} & \\underline{1.170} & 1.292 & 1.425 & \\textbf{0.053} & \\underline{0.066} & 0.068 & 0.096 & \\textbf{0.046} & \\underline{0.048} & 0.053 & 0.055 & 60.6 & 14.5 & \\underline{2.3} & \\textbf{0.9} \\\\\n", - "m3-yearly & \\textbf{2.704} & \\underline{3.062} & 3.486 & 3.172 & \\textbf{0.103} & \\underline{0.111} & 0.118 & 0.157 & \\textbf{0.080} & \\underline{0.089} & 0.101 & 0.089 & 53.4 & 9.4 & \\underline{1.9} & \\textbf{0.8} \\\\\n", - "m4-quarterly & \\textbf{1.142} & \\underline{1.218} & 1.269 & 1.602 & \\textbf{0.062} & \\underline{0.068} & 0.068 & 0.113 & \\textbf{0.050} & \\underline{0.053} & 0.055 & 0.063 & 408.4 & 2404.0 & \\underline{170.8} & \\textbf{1.5} \\\\\n", - "m4-yearly & \\textbf{3.002} & \\underline{3.559} & 3.750 & 3.966 & \\textbf{0.081} & \\underline{0.097} & 0.100 & 0.132 & \\textbf{0.067} & \\underline{0.081} & 0.086 & 0.082 & 197.2 & 1027.8 & \\underline{77.1} & \\textbf{1.3} \\\\\n", - "tourism-monthly & \\textbf{1.441} & 1.783 & 1.974 & \\underline{1.631} & \\textbf{0.072} & \\underline{0.082} & 0.084 & 0.114 & \\textbf{0.092} & 0.121 & 0.132 & \\underline{0.108} & 343.5 & 55.7 & \\underline{5.7} & \\textbf{0.9} \\\\\n", - "tourism-quarterly & \\textbf{1.506} & \\underline{1.636} & 1.800 & 1.699 & \\textbf{0.058} & \\underline{0.060} & 0.063 & 0.101 & \\textbf{0.072} & \\underline{0.081} & 0.090 & 0.083 & 69.3 & 12.6 & \\underline{2.2} & \\textbf{0.8} \\\\\n", - "tourism-yearly & \\textbf{3.277} & 3.641 & 4.175 & \\underline{3.552} & \\textbf{0.111} & \\underline{0.126} & 0.142 & 0.160 & \\textbf{0.184} & \\underline{0.207} & 0.245 & 0.211 & 51.1 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n", + "australian-electricity-demand & \\underline{1.115} & 1.184 & \\textbf{0.882} & 1.340 & 0.057 & \\underline{0.054} & \\textbf{0.042} & 0.098 & \\underline{0.051} & 0.051 & \\textbf{0.040} & 0.059 & 2716.9 & 4.7 & \\underline{2.1} & \\textbf{0.4} \\\\\n", + "car-parts-without-missing & 1.051 & \\underline{0.807} & \\textbf{0.803} & 1.120 & 1.132 & \\underline{1.059} & \\textbf{1.022} & 2.225 & \\underline{0.897} & 0.947 & 0.957 & \\textbf{0.310} & 37.9 & 61.4 & \\underline{5.3} & \\textbf{0.9} \\\\\n", + "cif-2016 & \\textbf{0.902} & \\underline{0.986} & 1.025 & 1.289 & 0.021 & \\textbf{0.015} & \\underline{0.019} & 0.056 & \\textbf{0.057} & \\underline{0.074} & 0.076 & 0.094 & 9.8 & 3.7 & \\underline{1.5} & \\textbf{0.6} \\\\\n", + "covid-deaths & \\textbf{5.248} & \\underline{6.540} & 6.555 & 7.762 & \\textbf{0.024} & \\underline{0.050} & 0.072 & 0.116 & \\textbf{0.054} & 0.205 & 0.204 & \\underline{0.093} & 22.0 & 32.0 & \\underline{3.9} & \\textbf{0.8} \\\\\n", + "dominick & 0.848 & \\underline{0.786} & \\textbf{0.782} & 0.828 & 0.529 & \\underline{0.414} & \\textbf{0.399} & 2.210 & \\underline{0.782} & 0.809 & 0.817 & \\textbf{0.160} & 874.0 & 8661.9 & \\underline{653.7} & \\textbf{12.4} \\\\\n", + "ercot & 1.356 & \\textbf{0.578} & \\underline{0.585} & 0.761 & 0.034 & \\underline{0.017} & \\textbf{0.016} & 0.039 & 0.027 & \\textbf{0.012} & \\underline{0.012} & 0.016 & 390.5 & 3.7 & \\underline{2.0} & \\textbf{0.4} \\\\\n", + "ett-small-15min & \\textbf{0.638} & \\underline{0.714} & 0.739 & 0.768 & \\textbf{0.083} & \\underline{0.083} & 0.088 & 0.143 & \\underline{0.100} & 0.110 & 0.116 & \\textbf{0.095} & 5034.8 & 4.9 & \\underline{2.0} & \\textbf{0.5} \\\\\n", + "ett-small-1h & 0.852 & \\textbf{0.737} & \\underline{0.805} & 0.932 & 0.109 & \\textbf{0.083} & \\underline{0.085} & 0.153 & 0.117 & \\textbf{0.091} & \\underline{0.100} & 0.103 & 332.1 & 4.6 & \\underline{1.7} & \\textbf{0.2} \\\\\n", + "exchange-rate & \\textbf{1.407} & 1.882 & 2.118 & \\underline{1.524} & \\textbf{0.007} & 0.011 & \\underline{0.010} & 0.016 & \\textbf{0.004} & 0.006 & 0.007 & \\underline{0.005} & 9.4 & 3.6 & \\underline{1.6} & \\textbf{0.1} \\\\\n", + "fred-md & \\textbf{0.482} & 0.571 & \\underline{0.564} & 1.101 & 0.033 & \\textbf{0.029} & \\underline{0.029} & 0.082 & \\textbf{0.052} & \\underline{0.052} & 0.052 & 0.073 & 49.1 & 15.5 & \\underline{2.4} & \\textbf{0.8} \\\\\n", + "hospital & \\textbf{0.749} & \\underline{0.810} & 0.815 & 0.921 & \\textbf{0.047} & \\underline{0.059} & 0.061 & 0.083 & \\textbf{0.086} & \\underline{0.093} & 0.093 & 0.105 & 30.6 & 20.4 & \\underline{2.9} & \\textbf{0.8} \\\\\n", + "m5 & 1.638 & \\underline{1.433} & \\textbf{1.432} & 1.867 & \\textbf{0.516} & 0.535 & \\underline{0.532} & 1.457 & \\underline{0.716} & 0.818 & 0.827 & \\textbf{0.404} & 15082.5 & 8071.7 & \\underline{662.5} & \\textbf{29.3} \\\\\n", + "nn5-daily-without-missing & \\underline{0.842} & \\textbf{0.824} & 0.900 & 1.011 & 0.146 & \\textbf{0.119} & \\underline{0.136} & 0.535 & \\underline{0.105} & \\textbf{0.103} & 0.112 & 0.132 & 38.4 & 57.0 & \\underline{5.8} & \\textbf{0.8} \\\\\n", + "nn5-weekly & 0.974 & \\textbf{0.929} & \\underline{0.933} & 1.063 & \\textbf{0.077} & \\underline{0.078} & 0.079 & 0.256 & 0.060 & \\textbf{0.058} & \\underline{0.058} & 0.066 & 5.8 & 3.9 & \\underline{1.5} & \\textbf{0.8} \\\\\n", + "traffic & 1.089 & \\underline{0.644} & \\textbf{0.640} & 1.785 & 0.341 & \\textbf{0.115} & \\underline{0.120} & 0.411 & 0.161 & \\textbf{0.077} & \\underline{0.082} & 0.244 & 4173.1 & 202.3 & \\underline{18.9} & \\textbf{3.1} \\\\\n", + "weather & 0.691 & \\textbf{0.565} & \\underline{0.598} & 0.755 & 0.201 & \\textbf{0.122} & \\underline{0.128} & 0.702 & \\underline{0.314} & 0.327 & 0.334 & \\textbf{0.213} & 1108.5 & 873.4 & \\underline{92.7} & \\textbf{22.1} \\\\\n", + "m1-monthly & \\textbf{1.034} & \\underline{1.089} & 1.193 & 1.314 & 0.121 & \\textbf{0.106} & \\underline{0.111} & 0.193 & \\textbf{0.073} & \\underline{0.076} & 0.082 & 0.087 & 27.7 & 36.1 & \\underline{4.2} & \\textbf{0.8} \\\\\n", + "m1-quarterly & \\textbf{1.594} & \\underline{1.730} & 1.799 & 2.078 & \\textbf{0.070} & 0.089 & \\underline{0.084} & 0.143 & \\textbf{0.078} & \\underline{0.087} & 0.092 & 0.095 & 7.2 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n", + "m1-yearly & \\textbf{3.574} & \\underline{4.329} & 5.160 & 4.894 & \\textbf{0.092} & \\underline{0.117} & 0.119 & 0.150 & \\textbf{0.088} & \\underline{0.107} & 0.129 & 0.112 & 5.7 & 3.8 & \\underline{1.5} & \\textbf{0.8} \\\\\n", + "m3-monthly & \\textbf{0.827} & \\underline{0.854} & 0.899 & 1.146 & \\textbf{0.078} & \\underline{0.083} & 0.084 & 0.166 & \\textbf{0.067} & \\underline{0.070} & 0.071 & 0.086 & 69.0 & 79.1 & \\underline{7.6} & \\textbf{0.9} \\\\\n", + "m3-other & \\textbf{0.727} & \\underline{0.845} & 0.869 & 1.474 & \\textbf{0.027} & 0.033 & \\underline{0.032} & 0.070 & \\textbf{0.022} & \\underline{0.024} & 0.025 & 0.041 & 7.4 & 5.1 & \\underline{1.6} & \\textbf{0.8} \\\\\n", + "m3-quarterly & \\textbf{1.164} & \\underline{1.170} & 1.292 & 1.425 & \\textbf{0.053} & \\underline{0.066} & 0.068 & 0.096 & \\textbf{0.046} & \\underline{0.048} & 0.053 & 0.055 & 12.7 & 14.5 & \\underline{2.3} & \\textbf{0.8} \\\\\n", + "m3-yearly & \\textbf{2.706} & \\underline{3.062} & 3.486 & 3.172 & \\textbf{0.103} & \\underline{0.111} & 0.118 & 0.157 & \\textbf{0.080} & \\underline{0.089} & 0.101 & 0.089 & 8.6 & 9.4 & \\underline{1.9} & \\textbf{0.8} \\\\\n", + "m4-quarterly & \\textbf{1.142} & \\underline{1.218} & 1.269 & 1.602 & \\textbf{0.062} & \\underline{0.068} & 0.068 & 0.113 & \\textbf{0.050} & \\underline{0.053} & 0.055 & 0.063 & 331.3 & 2404.0 & \\underline{170.8} & \\textbf{1.6} \\\\\n", + "m4-yearly & \\textbf{3.003} & \\underline{3.559} & 3.750 & 3.966 & \\textbf{0.081} & \\underline{0.097} & 0.100 & 0.132 & \\textbf{0.067} & \\underline{0.081} & 0.086 & 0.082 & 117.8 & 1027.8 & \\underline{77.1} & \\textbf{1.3} \\\\\n", + "tourism-monthly & \\textbf{1.441} & 1.783 & 1.974 & \\underline{1.631} & \\textbf{0.072} & \\underline{0.082} & 0.084 & 0.114 & \\textbf{0.092} & 0.121 & 0.132 & \\underline{0.108} & 151.1 & 55.7 & \\underline{5.7} & \\textbf{0.9} \\\\\n", + "tourism-quarterly & \\textbf{1.506} & \\underline{1.636} & 1.800 & 1.699 & \\textbf{0.058} & \\underline{0.060} & 0.063 & 0.101 & \\textbf{0.072} & \\underline{0.081} & 0.090 & 0.083 & 16.8 & 12.6 & \\underline{2.2} & \\textbf{0.8} \\\\\n", + "tourism-yearly & \\textbf{3.277} & 3.641 & 4.175 & \\underline{3.552} & \\textbf{0.111} & \\underline{0.126} & 0.142 & 0.160 & \\textbf{0.184} & \\underline{0.207} & 0.245 & 0.211 & 7.1 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n", "\\end{tabular}\n", "\n" ] @@ -1226,11 +1262,18 @@ "source": [ "print(full_df.style.to_latex().replace(\"nan\", \"N/A\"))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "nx", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1244,9 +1287,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.10.13" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/experiments/amazon-chronos/results/complete-results.csv b/experiments/amazon-chronos/results/complete-results.csv new file mode 100644 index 00000000..75576d5c --- /dev/null +++ b/experiments/amazon-chronos/results/complete-results.csv @@ -0,0 +1,225 @@ +dataset,metric,model,value +m5,mase,SeasonalNaive,1.8674323634298993 +m5,mase,StatisticalEnsemble,1.638152191603833 +m5,scaled_crps,SeasonalNaive,1.4574002623684272 +m5,scaled_crps,StatisticalEnsemble,0.5163249612630462 +m5,smape,SeasonalNaive,0.40357140598445107 +m5,smape,StatisticalEnsemble,0.7164228919710989 +m5,time,SeasonalNaive,29.344236135482788 +m5,time,StatisticalEnsemble,15082.48518037796 +dominick,mase,SeasonalNaive,0.8275396708993014 +dominick,mase,StatisticalEnsemble,0.8480031538431272 +dominick,scaled_crps,SeasonalNaive,2.210459061912306 +dominick,scaled_crps,StatisticalEnsemble,0.5288209674678412 +dominick,smape,SeasonalNaive,0.16016363526707927 +dominick,smape,StatisticalEnsemble,0.7823167704484896 +dominick,time,SeasonalNaive,12.37143611907959 +dominick,time,StatisticalEnsemble,874.0449843406677 +nn5_daily_without_missing,mase,SeasonalNaive,1.0112834698529345 +nn5_daily_without_missing,mase,StatisticalEnsemble,0.8423753193786091 +nn5_daily_without_missing,scaled_crps,SeasonalNaive,0.5351702879273849 +nn5_daily_without_missing,scaled_crps,StatisticalEnsemble,0.14553736570775094 +nn5_daily_without_missing,smape,SeasonalNaive,0.13235231518183066 +nn5_daily_without_missing,smape,StatisticalEnsemble,0.10488079515342492 +nn5_daily_without_missing,time,SeasonalNaive,0.8123540878295898 +nn5_daily_without_missing,time,StatisticalEnsemble,38.4502534866333 +nn5_weekly,mase,SeasonalNaive,1.062848269026432 +nn5_weekly,mase,StatisticalEnsemble,0.9735932153741834 +nn5_weekly,scaled_crps,SeasonalNaive,0.25582228822873815 +nn5_weekly,scaled_crps,StatisticalEnsemble,0.07720804470616706 +nn5_weekly,smape,SeasonalNaive,0.06633929067482891 +nn5_weekly,smape,StatisticalEnsemble,0.06025986107271241 +nn5_weekly,time,SeasonalNaive,0.8171298503875732 +nn5_weekly,time,StatisticalEnsemble,5.758360147476196 +traffic,mase,SeasonalNaive,1.7853234976926748 +traffic,mase,StatisticalEnsemble,1.0892706069210114 +traffic,scaled_crps,SeasonalNaive,0.41143591353383 +traffic,scaled_crps,StatisticalEnsemble,0.3414031320024622 +traffic,smape,SeasonalNaive,0.2438530051030199 +traffic,smape,StatisticalEnsemble,0.16081715430023405 +traffic,time,SeasonalNaive,3.057039260864258 +traffic,time,StatisticalEnsemble,4173.05211687088 +weather,mase,SeasonalNaive,0.754604010525011 +weather,mase,StatisticalEnsemble,0.6914725865126918 +weather,scaled_crps,SeasonalNaive,0.7023248754552787 +weather,scaled_crps,StatisticalEnsemble,0.20146415997944236 +weather,smape,SeasonalNaive,0.21257362636712376 +weather,smape,StatisticalEnsemble,0.31372190193744925 +weather,time,SeasonalNaive,22.064918279647827 +weather,time,StatisticalEnsemble,1108.4931182861328 +australian_electricity_demand,mase,SeasonalNaive,1.3403993825206266 +australian_electricity_demand,mase,StatisticalEnsemble,1.1151674787713628 +australian_electricity_demand,scaled_crps,SeasonalNaive,0.09814662393134205 +australian_electricity_demand,scaled_crps,StatisticalEnsemble,0.05735046591017824 +australian_electricity_demand,smape,SeasonalNaive,0.058815216824657776 +australian_electricity_demand,smape,StatisticalEnsemble,0.05083634433366544 +australian_electricity_demand,time,SeasonalNaive,0.3602612018585205 +australian_electricity_demand,time,StatisticalEnsemble,2716.8693885803223 +car_parts_without_missing,mase,SeasonalNaive,1.1201381265856145 +car_parts_without_missing,mase,StatisticalEnsemble,1.050660105202646 +car_parts_without_missing,scaled_crps,SeasonalNaive,2.2252592727461242 +car_parts_without_missing,scaled_crps,StatisticalEnsemble,1.1320439488874523 +car_parts_without_missing,smape,SeasonalNaive,0.31018504435473626 +car_parts_without_missing,smape,StatisticalEnsemble,0.8966779513784662 +car_parts_without_missing,time,SeasonalNaive,0.9146387577056884 +car_parts_without_missing,time,StatisticalEnsemble,37.927063941955566 +cif_2016,mase,SeasonalNaive,1.2892906338147911 +cif_2016,mase,StatisticalEnsemble,0.9022365251221355 +cif_2016,scaled_crps,SeasonalNaive,0.0562778589590996 +cif_2016,scaled_crps,StatisticalEnsemble,0.020951705353389735 +cif_2016,smape,SeasonalNaive,0.094358553312353 +cif_2016,smape,StatisticalEnsemble,0.05749614987192558 +cif_2016,time,SeasonalNaive,0.6034519672393799 +cif_2016,time,StatisticalEnsemble,9.755268335342407 +covid_deaths,mase,SeasonalNaive,7.7623797482537915 +covid_deaths,mase,StatisticalEnsemble,5.247590431278719 +covid_deaths,scaled_crps,SeasonalNaive,0.11604983030613081 +covid_deaths,scaled_crps,StatisticalEnsemble,0.024490948114393132 +covid_deaths,smape,SeasonalNaive,0.09313273813541141 +covid_deaths,smape,StatisticalEnsemble,0.053916857464500395 +covid_deaths,time,SeasonalNaive,0.8153104782104492 +covid_deaths,time,StatisticalEnsemble,22.0314781665802 +ercot,mase,SeasonalNaive,0.7613353868080769 +ercot,mase,StatisticalEnsemble,1.3562494003539405 +ercot,scaled_crps,SeasonalNaive,0.03946121626919911 +ercot,scaled_crps,StatisticalEnsemble,0.034150258315336315 +ercot,smape,SeasonalNaive,0.015630707775037007 +ercot,smape,StatisticalEnsemble,0.026523641327514176 +ercot,time,SeasonalNaive,0.3966584205627441 +ercot,time,StatisticalEnsemble,390.5005609989166 +ett_small_15min,mase,SeasonalNaive,0.7677330344383062 +ett_small_15min,mase,StatisticalEnsemble,0.6382274420888457 +ett_small_15min,scaled_crps,SeasonalNaive,0.1430347714870558 +ett_small_15min,scaled_crps,StatisticalEnsemble,0.08269271740545785 +ett_small_15min,smape,SeasonalNaive,0.09459814128105122 +ett_small_15min,smape,StatisticalEnsemble,0.10039187891984028 +ett_small_15min,time,SeasonalNaive,0.4571559429168701 +ett_small_15min,time,StatisticalEnsemble,5034.763110637665 +ett_small_1h,mase,SeasonalNaive,0.9316203232327729 +ett_small_1h,mase,StatisticalEnsemble,0.8519611234196047 +ett_small_1h,scaled_crps,SeasonalNaive,0.15305326839468023 +ett_small_1h,scaled_crps,StatisticalEnsemble,0.10893023778108452 +ett_small_1h,smape,SeasonalNaive,0.10259586641393585 +ett_small_1h,smape,StatisticalEnsemble,0.1171297505877785 +ett_small_1h,time,SeasonalNaive,0.2092659473419189 +ett_small_1h,time,StatisticalEnsemble,332.1302688121796 +exchange_rate,mase,SeasonalNaive,1.5238482740215735 +exchange_rate,mase,StatisticalEnsemble,1.4067630220218135 +exchange_rate,scaled_crps,SeasonalNaive,0.01577147847394285 +exchange_rate,scaled_crps,StatisticalEnsemble,0.007234723993796653 +exchange_rate,smape,SeasonalNaive,0.004661980297476255 +exchange_rate,smape,StatisticalEnsemble,0.0044398183280252015 +exchange_rate,time,SeasonalNaive,0.0944149494171142 +exchange_rate,time,StatisticalEnsemble,9.35352873802185 +fred_md,mase,SeasonalNaive,1.1008000208784752 +fred_md,mase,StatisticalEnsemble,0.4818602144932258 +fred_md,scaled_crps,SeasonalNaive,0.08215697114580049 +fred_md,scaled_crps,StatisticalEnsemble,0.03272829800542708 +fred_md,smape,SeasonalNaive,0.0729485559677336 +fred_md,smape,StatisticalEnsemble,0.05220994480946772 +fred_md,time,SeasonalNaive,0.8469631671905518 +fred_md,time,StatisticalEnsemble,49.06454372406006 +hospital,mase,SeasonalNaive,0.9205278258071787 +hospital,mase,StatisticalEnsemble,0.7485655129982478 +hospital,scaled_crps,SeasonalNaive,0.08269508339071587 +hospital,scaled_crps,StatisticalEnsemble,0.046899209602966556 +hospital,smape,SeasonalNaive,0.10512677152235944 +hospital,smape,StatisticalEnsemble,0.0859826808296096 +hospital,time,SeasonalNaive,0.8399276733398438 +hospital,time,StatisticalEnsemble,30.61531782150269 +m1_yearly,mase,SeasonalNaive,4.894322198189649 +m1_yearly,mase,StatisticalEnsemble,3.574145484272531 +m1_yearly,scaled_crps,SeasonalNaive,0.14966361102972647 +m1_yearly,scaled_crps,StatisticalEnsemble,0.0922698712661802 +m1_yearly,smape,SeasonalNaive,0.11216119472992997 +m1_yearly,smape,StatisticalEnsemble,0.08812465232291931 +m1_yearly,time,SeasonalNaive,0.7850766181945801 +m1_yearly,time,StatisticalEnsemble,5.73260760307312 +m1_quarterly,mase,SeasonalNaive,2.0775365099320915 +m1_quarterly,mase,StatisticalEnsemble,1.5939583404585844 +m1_quarterly,scaled_crps,SeasonalNaive,0.14308704311173895 +m1_quarterly,scaled_crps,StatisticalEnsemble,0.06983964327851233 +m1_quarterly,smape,SeasonalNaive,0.0947186951123695 +m1_quarterly,smape,StatisticalEnsemble,0.07840308519164578 +m1_quarterly,time,SeasonalNaive,0.7976851463317871 +m1_quarterly,time,StatisticalEnsemble,7.182486057281494 +m1_monthly,mase,SeasonalNaive,1.3144614942308708 +m1_monthly,mase,StatisticalEnsemble,1.0339353928765094 +m1_monthly,scaled_crps,SeasonalNaive,0.1929014674058537 +m1_monthly,scaled_crps,StatisticalEnsemble,0.12133830358487802 +m1_monthly,smape,SeasonalNaive,0.08654688593844032 +m1_monthly,smape,StatisticalEnsemble,0.0730905532066498 +m1_monthly,time,SeasonalNaive,0.8262171745300293 +m1_monthly,time,StatisticalEnsemble,27.710077047348022 +m3_yearly,mase,SeasonalNaive,3.1717102343239496 +m3_yearly,mase,StatisticalEnsemble,2.7060074099502085 +m3_yearly,scaled_crps,SeasonalNaive,0.15694102939190013 +m3_yearly,scaled_crps,StatisticalEnsemble,0.10307027528088823 +m3_yearly,smape,SeasonalNaive,0.08939945244930989 +m3_yearly,smape,StatisticalEnsemble,0.07999691218033288 +m3_yearly,time,SeasonalNaive,0.8121771812438965 +m3_yearly,time,StatisticalEnsemble,8.576204299926758 +m3_quarterly,mase,SeasonalNaive,1.425343782675886 +m3_quarterly,mase,StatisticalEnsemble,1.16376103880926 +m3_quarterly,scaled_crps,SeasonalNaive,0.09558308169170797 +m3_quarterly,scaled_crps,StatisticalEnsemble,0.05341428588839114 +m3_quarterly,smape,SeasonalNaive,0.05532565657960303 +m3_quarterly,smape,StatisticalEnsemble,0.04614976537182415 +m3_quarterly,time,SeasonalNaive,0.8202786445617676 +m3_quarterly,time,StatisticalEnsemble,12.746824741363524 +m3_monthly,mase,SeasonalNaive,1.1462045740727513 +m3_monthly,mase,StatisticalEnsemble,0.8267793470895428 +m3_monthly,scaled_crps,SeasonalNaive,0.1657581291619502 +m3_monthly,scaled_crps,StatisticalEnsemble,0.07762084692788024 +m3_monthly,smape,SeasonalNaive,0.0861955076505472 +m3_monthly,smape,StatisticalEnsemble,0.06722927141202216 +m3_monthly,time,SeasonalNaive,0.9021949768066406 +m3_monthly,time,StatisticalEnsemble,69.00676345825195 +m3_other,mase,SeasonalNaive,1.4741669727228415 +m3_other,mase,StatisticalEnsemble,0.7270561697641876 +m3_other,scaled_crps,SeasonalNaive,0.06994385450714162 +m3_other,scaled_crps,StatisticalEnsemble,0.026865030394171055 +m3_other,smape,SeasonalNaive,0.041276500014025615 +m3_other,smape,StatisticalEnsemble,0.021632279162270698 +m3_other,time,SeasonalNaive,0.7998371124267578 +m3_other,time,StatisticalEnsemble,7.380923748016357 +tourism_yearly,mase,SeasonalNaive,3.552009714599977 +tourism_yearly,mase,StatisticalEnsemble,3.277077062185471 +tourism_yearly,scaled_crps,SeasonalNaive,0.15974723271821942 +tourism_yearly,scaled_crps,StatisticalEnsemble,0.11125194688603007 +tourism_yearly,smape,SeasonalNaive,0.21083326227292737 +tourism_yearly,smape,StatisticalEnsemble,0.18384696850918383 +tourism_yearly,time,SeasonalNaive,0.8069603443145752 +tourism_yearly,time,StatisticalEnsemble,7.067137002944946 +tourism_quarterly,mase,SeasonalNaive,1.6989892602245846 +tourism_quarterly,mase,StatisticalEnsemble,1.5058255429273564 +tourism_quarterly,scaled_crps,SeasonalNaive,0.10142968168460322 +tourism_quarterly,scaled_crps,StatisticalEnsemble,0.0576224860295668 +tourism_quarterly,smape,SeasonalNaive,0.08304859164472883 +tourism_quarterly,smape,StatisticalEnsemble,0.07237160342871483 +tourism_quarterly,time,SeasonalNaive,0.8143134117126465 +tourism_quarterly,time,StatisticalEnsemble,16.84568691253662 +tourism_monthly,mase,SeasonalNaive,1.6309399914977092 +tourism_monthly,mase,StatisticalEnsemble,1.4409102971283867 +tourism_monthly,scaled_crps,SeasonalNaive,0.11447973197962824 +tourism_monthly,scaled_crps,StatisticalEnsemble,0.07190242570828337 +tourism_monthly,smape,SeasonalNaive,0.10834946268820275 +tourism_monthly,smape,StatisticalEnsemble,0.0915893066611628 +tourism_monthly,time,SeasonalNaive,0.864677906036377 +tourism_monthly,time,StatisticalEnsemble,151.14043617248535 +m4_yearly,mase,SeasonalNaive,3.965954942439523 +m4_yearly,mase,StatisticalEnsemble,3.002568369690919 +m4_yearly,scaled_crps,SeasonalNaive,0.13235538778860942 +m4_yearly,scaled_crps,StatisticalEnsemble,0.08139201567816129 +m4_yearly,smape,SeasonalNaive,0.0817671723808375 +m4_yearly,smape,StatisticalEnsemble,0.06748203578862982 +m4_yearly,time,SeasonalNaive,1.3356640338897705 +m4_yearly,time,StatisticalEnsemble,117.77939891815186 +m4_quarterly,mase,SeasonalNaive,1.6022471757947163 +m4_quarterly,mase,StatisticalEnsemble,1.1424438068562632 +m4_quarterly,scaled_crps,SeasonalNaive,0.11284029920261848 +m4_quarterly,scaled_crps,StatisticalEnsemble,0.061619570590469346 +m4_quarterly,smape,SeasonalNaive,0.06260683416118425 +m4_quarterly,smape,StatisticalEnsemble,0.049689605512879355 +m4_quarterly,time,SeasonalNaive,1.6325972080230713 +m4_quarterly,time,StatisticalEnsemble,331.26417803764343 diff --git a/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py b/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py index a7a2f7e3..0bedbbcf 100644 --- a/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py +++ b/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py @@ -72,7 +72,7 @@ def run_statistical_ensemble( freq: str, seasonality: int, quantiles: List[float], - max_context_length: int = 5_000, + max_context_length: int = 2_000, ) -> Tuple[pd.DataFrame, float, str]: os.environ["NIXTLA_ID_AS_COL"] = "true" models = [ @@ -82,14 +82,11 @@ def run_statistical_ensemble( DynamicOptimizedTheta(season_length=seasonality), ] init_time = time() - series_per_core = 15 - n_series = train_df["unique_id"].nunique() - n_jobs = max(1, min(n_series // series_per_core, os.cpu_count())) sf = StatsForecast( models=models, fallback_model=SeasonalNaive(season_length=seasonality), freq=freq, - n_jobs=n_jobs, + n_jobs=-1, ) # Shorten all time series to at most max_context_length to avoid extremely long runtime train_df = train_df.groupby("unique_id", sort=False, as_index=False).tail( diff --git a/experiments/amazon-chronos/src/eval_utils/utils.py b/experiments/amazon-chronos/src/eval_utils/utils.py index 8c9c4127..712d395a 100644 --- a/experiments/amazon-chronos/src/eval_utils/utils.py +++ b/experiments/amazon-chronos/src/eval_utils/utils.py @@ -195,8 +195,10 @@ def evaluate_models(self, models: List[str]) -> pd.DataFrame: fcsts_df = pd.concat(fcsts_df, axis=1).reset_index() fcsts_df["ds"] = pd.to_datetime(fcsts_df["ds"]) times_df = pd.concat(times_df) - return evaluate_from_predictions( - models=model, fcsts_df=fcsts_df, times_df=times_df + return self.evaluate_from_predictions( + models=model, + fcsts_df=fcsts_df, + times_df=times_df, ) def evaluate_from_predictions(