diff --git a/experiments/amazon-chronos/README.md b/experiments/amazon-chronos/README.md
index 4e2a2b9d..3e292051 100644
--- a/experiments/amazon-chronos/README.md
+++ b/experiments/amazon-chronos/README.md
@@ -1,6 +1,10 @@
# Extended comparison of Chronos against the statistical ensemble
-We present an extension to the [original comparison by Nixtla](https://github.com/Nixtla/nixtla/tree/main/experiments/amazon-chronos) of Chronos [1] against the SCUM ensemble [2]. In this analysis on over 200K unique time series across 28 datasets from Benchmark II in the Chronos paper [1], we show that **zero-shot** Chronos models perform comparably to this strong ensemble of 4 statistical models while being significantly faster on average. We follow the original study as closely as possible, including loading task definitions from GluonTS and computing metrics using utilsforecast.
+
+## Background
+A few weeks ago, we presented a [fully reproducible experiment](https://github.com/Nixtla/nixtla/tree/main/experiments/amazon-chronos) showing that Amazon Chronos was 10% less accurate and 500% slower than training classical statistical models. The Amazon team kindly answered by extending our benchmarking efforts, confirming our results for the selected datasets, and showing a differentiated performance for new datasets.
+
+Here we present an extension to the [original comparison by Nixtla](https://github.com/Nixtla/nixtla/tree/main/experiments/amazon-chronos) of Chronos [1] against the SCUM ensemble [2]. In this analysis on over 200K unique time series across 28 datasets from Benchmark II in the Chronos paper [1], we show that **zero-shot** Chronos models perform comparably to this strong ensemble of 4 statistical models while being significantly faster on average. We follow the original study as closely as possible, including loading task definitions from GluonTS and computing metrics using utilsforecast.
## Empirical Evaluation
diff --git a/experiments/amazon-chronos/collect_results.ipynb b/experiments/amazon-chronos/collect_results.ipynb
index d7c2f2c1..e2eda55d 100644
--- a/experiments/amazon-chronos/collect_results.ipynb
+++ b/experiments/amazon-chronos/collect_results.ipynb
@@ -2,12 +2,11 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "from metaflow import Flow, Run, Step"
+ "import pandas as pd"
]
},
{
@@ -19,7 +18,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -48,26 +47,29 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
- "save_to_disk = False"
+ "save_to_disk = False\n",
+ "collect_results = False"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# Make sure to replace these with your run IDs!\n",
- "results_all = pd.concat([\n",
- " get_results(1712079941097970), # SeasonalNaive\n",
- " get_results(1712079795572065), # StatisticalEnsemble\n",
- " get_results(1712080010851589), # chronos_mini\n",
- " get_results(1712081461874960), # chronos_large\n",
- "])\n",
+ "if collect_results:\n",
+ " from metaflow import Flow, Run, Step\n",
+ " results_all = pd.concat([\n",
+ " get_results(1712079941097970), # SeasonalNaive\n",
+ " get_results(1712079795572065), # StatisticalEnsemble\n",
+ " get_results(1712080010851589), # chronos_mini\n",
+ " get_results(1712081461874960), # chronos_large\n",
+ " ])\n",
"if save_to_disk:\n",
" results_all.to_csv(\"results/results_all.csv\", index=False)"
]
@@ -81,14 +83,15 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
- "results_all = []\n",
- "for run in list(Flow(\"ForecastEvaluation\").runs())[:5]:\n",
- " results_all.append(get_results(run.id))\n",
- "results_all = pd.concat(results_all).dropna(subset=\"value\").drop_duplicates([\"dataset\", \"model\", \"metric\"])\n",
+ "if collect_results:\n",
+ " results_all = []\n",
+ " for run in list(Flow(\"ForecastEvaluation\").runs())[:5]:\n",
+ " results_all.append(get_results(run.id))\n",
+ " results_all = pd.concat(results_all).dropna(subset=\"value\").drop_duplicates([\"dataset\", \"model\", \"metric\"])\n",
"if save_to_disk:\n",
" results_all.to_csv(\"results/results_all.csv\", index=False)"
]
@@ -102,11 +105,31 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
- "results_all = pd.read_csv(\"results/results_all.csv\")"
+ "results_all = pd.read_csv(\"results/results_all.csv\")\n",
+ "results_all = results_all.query(\"model.str.startswith('amazon')\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Patch Results modifying context length seen by the Statistical Ensemble"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results_all = pd.concat([\n",
+ " results_all,\n",
+ " pd.read_csv(\"results/complete-results.csv\"),\n",
+ "])"
]
},
{
@@ -118,7 +141,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
@@ -127,7 +150,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 46,
"metadata": {},
"outputs": [
{
@@ -203,26 +226,26 @@
"
\n",
" australian_electricity_demand | \n",
" 1.340 | \n",
- " 1.215 | \n",
+ " 1.115 | \n",
" 1.184 | \n",
" 0.882 | \n",
" 0.098 | \n",
- " 0.058 | \n",
+ " 0.057 | \n",
" 0.054 | \n",
" 0.042 | \n",
" 0.059 | \n",
- " 0.055 | \n",
+ " 0.051 | \n",
" 0.051 | \n",
" 0.040 | \n",
- " 0.333 | \n",
- " 12602.872 | \n",
+ " 0.360 | \n",
+ " 2716.869 | \n",
" 4.703 | \n",
" 2.116 | \n",
"
\n",
" \n",
" car_parts_without_missing | \n",
" 1.120 | \n",
- " 1.050 | \n",
+ " 1.051 | \n",
" 0.807 | \n",
" 0.803 | \n",
" 2.225 | \n",
@@ -233,15 +256,15 @@
" 0.897 | \n",
" 0.947 | \n",
" 0.957 | \n",
- " 0.944 | \n",
- " 110.575 | \n",
+ " 0.915 | \n",
+ " 37.927 | \n",
" 61.375 | \n",
" 5.296 | \n",
"
\n",
" \n",
" cif_2016 | \n",
" 1.289 | \n",
- " 0.918 | \n",
+ " 0.902 | \n",
" 0.986 | \n",
" 1.025 | \n",
" 0.056 | \n",
@@ -249,30 +272,30 @@
" 0.015 | \n",
" 0.019 | \n",
" 0.094 | \n",
- " 0.058 | \n",
+ " 0.057 | \n",
" 0.074 | \n",
" 0.076 | \n",
- " 0.672 | \n",
- " 83.170 | \n",
+ " 0.603 | \n",
+ " 9.755 | \n",
" 3.744 | \n",
" 1.532 | \n",
"
\n",
" \n",
" covid_deaths | \n",
" 7.762 | \n",
- " 5.246 | \n",
+ " 5.248 | \n",
" 6.540 | \n",
" 6.555 | \n",
" 0.116 | \n",
- " 0.025 | \n",
+ " 0.024 | \n",
" 0.050 | \n",
" 0.072 | \n",
" 0.093 | \n",
" 0.054 | \n",
" 0.205 | \n",
" 0.204 | \n",
- " 0.883 | \n",
- " 86.333 | \n",
+ " 0.815 | \n",
+ " 22.031 | \n",
" 32.049 | \n",
" 3.871 | \n",
"
\n",
@@ -290,72 +313,72 @@
" 0.782 | \n",
" 0.809 | \n",
" 0.817 | \n",
- " 11.452 | \n",
- " 981.563 | \n",
+ " 12.371 | \n",
+ " 874.045 | \n",
" 8661.922 | \n",
" 653.726 | \n",
" \n",
" \n",
" ercot | \n",
" 0.761 | \n",
- " 1.979 | \n",
+ " 1.356 | \n",
" 0.578 | \n",
" 0.585 | \n",
" 0.039 | \n",
- " 0.050 | \n",
+ " 0.034 | \n",
" 0.017 | \n",
" 0.016 | \n",
" 0.016 | \n",
- " 0.040 | \n",
+ " 0.027 | \n",
" 0.012 | \n",
" 0.012 | \n",
- " 0.375 | \n",
- " 5119.783 | \n",
+ " 0.397 | \n",
+ " 390.501 | \n",
" 3.666 | \n",
" 1.979 | \n",
"
\n",
" \n",
" ett_small_15min | \n",
" 0.768 | \n",
- " NaN | \n",
+ " 0.638 | \n",
" 0.714 | \n",
" 0.739 | \n",
" 0.143 | \n",
- " NaN | \n",
+ " 0.083 | \n",
" 0.083 | \n",
" 0.088 | \n",
" 0.095 | \n",
- " NaN | \n",
+ " 0.100 | \n",
" 0.110 | \n",
" 0.116 | \n",
- " 0.422 | \n",
- " NaN | \n",
+ " 0.457 | \n",
+ " 5034.763 | \n",
" 4.933 | \n",
" 2.050 | \n",
"
\n",
" \n",
" ett_small_1h | \n",
" 0.932 | \n",
- " 1.003 | \n",
+ " 0.852 | \n",
" 0.737 | \n",
" 0.805 | \n",
" 0.153 | \n",
- " 0.123 | \n",
+ " 0.109 | \n",
" 0.083 | \n",
" 0.085 | \n",
" 0.103 | \n",
- " 0.131 | \n",
+ " 0.117 | \n",
" 0.091 | \n",
" 0.100 | \n",
- " 0.183 | \n",
- " 4206.250 | \n",
+ " 0.209 | \n",
+ " 332.130 | \n",
" 4.607 | \n",
" 1.688 | \n",
"
\n",
" \n",
" exchange_rate | \n",
" 1.524 | \n",
- " 1.429 | \n",
+ " 1.407 | \n",
" 1.882 | \n",
" 2.118 | \n",
" 0.016 | \n",
@@ -366,34 +389,34 @@
" 0.004 | \n",
" 0.006 | \n",
" 0.007 | \n",
- " 0.075 | \n",
- " 259.485 | \n",
+ " 0.094 | \n",
+ " 9.354 | \n",
" 3.613 | \n",
" 1.554 | \n",
"
\n",
" \n",
" fred_md | \n",
" 1.101 | \n",
- " 0.489 | \n",
+ " 0.482 | \n",
" 0.571 | \n",
" 0.564 | \n",
" 0.082 | \n",
- " 0.035 | \n",
+ " 0.033 | \n",
" 0.029 | \n",
" 0.029 | \n",
" 0.073 | \n",
" 0.052 | \n",
" 0.052 | \n",
" 0.052 | \n",
- " 0.872 | \n",
- " 235.237 | \n",
+ " 0.847 | \n",
+ " 49.065 | \n",
" 15.487 | \n",
" 2.359 | \n",
"
\n",
" \n",
" hospital | \n",
" 0.921 | \n",
- " 0.748 | \n",
+ " 0.749 | \n",
" 0.810 | \n",
" 0.815 | \n",
" 0.083 | \n",
@@ -404,8 +427,8 @@
" 0.086 | \n",
" 0.093 | \n",
" 0.093 | \n",
- " 0.880 | \n",
- " 78.938 | \n",
+ " 0.840 | \n",
+ " 30.615 | \n",
" 20.415 | \n",
" 2.852 | \n",
"
\n",
@@ -423,15 +446,15 @@
" 0.716 | \n",
" 0.818 | \n",
" 0.827 | \n",
- " 27.139 | \n",
- " 15599.160 | \n",
+ " 29.344 | \n",
+ " 15082.485 | \n",
" 8071.688 | \n",
" 662.490 | \n",
" \n",
" \n",
" nn5_daily_without_missing | \n",
" 1.011 | \n",
- " 0.844 | \n",
+ " 0.842 | \n",
" 0.824 | \n",
" 0.900 | \n",
" 0.535 | \n",
@@ -442,8 +465,8 @@
" 0.105 | \n",
" 0.103 | \n",
" 0.112 | \n",
- " 0.882 | \n",
- " 235.894 | \n",
+ " 0.812 | \n",
+ " 38.450 | \n",
" 57.016 | \n",
" 5.834 | \n",
"
\n",
@@ -461,46 +484,46 @@
" 0.060 | \n",
" 0.058 | \n",
" 0.058 | \n",
- " 0.812 | \n",
- " 51.829 | \n",
+ " 0.817 | \n",
+ " 5.758 | \n",
" 3.854 | \n",
" 1.515 | \n",
" \n",
" \n",
" traffic | \n",
" 1.785 | \n",
- " 1.163 | \n",
+ " 1.089 | \n",
" 0.644 | \n",
" 0.640 | \n",
" 0.411 | \n",
- " 0.353 | \n",
+ " 0.341 | \n",
" 0.115 | \n",
" 0.120 | \n",
" 0.244 | \n",
- " 0.179 | \n",
+ " 0.161 | \n",
" 0.077 | \n",
" 0.082 | \n",
- " 2.627 | \n",
- " 12285.943 | \n",
+ " 3.057 | \n",
+ " 4173.052 | \n",
" 202.267 | \n",
" 18.898 | \n",
"
\n",
" \n",
" weather | \n",
" 0.755 | \n",
- " 0.675 | \n",
+ " 0.691 | \n",
" 0.565 | \n",
" 0.598 | \n",
" 0.702 | \n",
- " 0.207 | \n",
+ " 0.201 | \n",
" 0.122 | \n",
" 0.128 | \n",
" 0.213 | \n",
- " 0.312 | \n",
+ " 0.314 | \n",
" 0.327 | \n",
" 0.334 | \n",
- " 19.922 | \n",
- " 4159.202 | \n",
+ " 22.065 | \n",
+ " 1108.493 | \n",
" 873.384 | \n",
" 92.715 | \n",
"
\n",
@@ -518,8 +541,8 @@
" 0.073 | \n",
" 0.076 | \n",
" 0.082 | \n",
- " 0.842 | \n",
- " 84.964 | \n",
+ " 0.826 | \n",
+ " 27.710 | \n",
" 36.095 | \n",
" 4.165 | \n",
" \n",
@@ -537,8 +560,8 @@
" 0.078 | \n",
" 0.087 | \n",
" 0.092 | \n",
- " 0.836 | \n",
- " 56.230 | \n",
+ " 0.798 | \n",
+ " 7.182 | \n",
" 6.025 | \n",
" 1.691 | \n",
" \n",
@@ -556,15 +579,15 @@
" 0.088 | \n",
" 0.107 | \n",
" 0.129 | \n",
- " 0.854 | \n",
- " 49.356 | \n",
+ " 0.785 | \n",
+ " 5.733 | \n",
" 3.797 | \n",
" 1.471 | \n",
" \n",
" \n",
" m3_monthly | \n",
" 1.146 | \n",
- " 0.826 | \n",
+ " 0.827 | \n",
" 0.854 | \n",
" 0.899 | \n",
" 0.166 | \n",
@@ -575,8 +598,8 @@
" 0.067 | \n",
" 0.070 | \n",
" 0.071 | \n",
- " 0.904 | \n",
- " 155.557 | \n",
+ " 0.902 | \n",
+ " 69.007 | \n",
" 79.104 | \n",
" 7.552 | \n",
"
\n",
@@ -594,8 +617,8 @@
" 0.022 | \n",
" 0.024 | \n",
" 0.025 | \n",
- " 0.809 | \n",
- " 57.059 | \n",
+ " 0.800 | \n",
+ " 7.381 | \n",
" 5.051 | \n",
" 1.609 | \n",
" \n",
@@ -613,15 +636,15 @@
" 0.046 | \n",
" 0.048 | \n",
" 0.053 | \n",
- " 0.877 | \n",
- " 60.618 | \n",
+ " 0.820 | \n",
+ " 12.747 | \n",
" 14.497 | \n",
" 2.305 | \n",
" \n",
" \n",
" m3_yearly | \n",
" 3.172 | \n",
- " 2.704 | \n",
+ " 2.706 | \n",
" 3.062 | \n",
" 3.486 | \n",
" 0.157 | \n",
@@ -632,8 +655,8 @@
" 0.080 | \n",
" 0.089 | \n",
" 0.101 | \n",
- " 0.838 | \n",
- " 53.385 | \n",
+ " 0.812 | \n",
+ " 8.576 | \n",
" 9.448 | \n",
" 1.886 | \n",
"
\n",
@@ -651,15 +674,15 @@
" 0.050 | \n",
" 0.053 | \n",
" 0.055 | \n",
- " 1.491 | \n",
- " 408.369 | \n",
+ " 1.633 | \n",
+ " 331.264 | \n",
" 2403.956 | \n",
" 170.778 | \n",
" \n",
" \n",
" m4_yearly | \n",
" 3.966 | \n",
- " 3.002 | \n",
+ " 3.003 | \n",
" 3.559 | \n",
" 3.750 | \n",
" 0.132 | \n",
@@ -670,8 +693,8 @@
" 0.067 | \n",
" 0.081 | \n",
" 0.086 | \n",
- " 1.280 | \n",
- " 197.224 | \n",
+ " 1.336 | \n",
+ " 117.779 | \n",
" 1027.831 | \n",
" 77.065 | \n",
"
\n",
@@ -689,8 +712,8 @@
" 0.092 | \n",
" 0.121 | \n",
" 0.132 | \n",
- " 0.866 | \n",
- " 343.512 | \n",
+ " 0.865 | \n",
+ " 151.140 | \n",
" 55.717 | \n",
" 5.705 | \n",
" \n",
@@ -708,8 +731,8 @@
" 0.072 | \n",
" 0.081 | \n",
" 0.090 | \n",
- " 0.843 | \n",
- " 69.271 | \n",
+ " 0.814 | \n",
+ " 16.846 | \n",
" 12.613 | \n",
" 2.191 | \n",
" \n",
@@ -727,8 +750,8 @@
" 0.184 | \n",
" 0.207 | \n",
" 0.245 | \n",
- " 0.842 | \n",
- " 51.096 | \n",
+ " 0.807 | \n",
+ " 7.067 | \n",
" 5.982 | \n",
" 1.653 | \n",
" \n",
@@ -740,31 +763,31 @@
"metric mase \\\n",
"model SeasonalNaive StatisticalEnsemble \n",
"dataset \n",
- "australian_electricity_demand 1.340 1.215 \n",
- "car_parts_without_missing 1.120 1.050 \n",
- "cif_2016 1.289 0.918 \n",
- "covid_deaths 7.762 5.246 \n",
+ "australian_electricity_demand 1.340 1.115 \n",
+ "car_parts_without_missing 1.120 1.051 \n",
+ "cif_2016 1.289 0.902 \n",
+ "covid_deaths 7.762 5.248 \n",
"dominick 0.828 0.848 \n",
- "ercot 0.761 1.979 \n",
- "ett_small_15min 0.768 NaN \n",
- "ett_small_1h 0.932 1.003 \n",
- "exchange_rate 1.524 1.429 \n",
- "fred_md 1.101 0.489 \n",
- "hospital 0.921 0.748 \n",
+ "ercot 0.761 1.356 \n",
+ "ett_small_15min 0.768 0.638 \n",
+ "ett_small_1h 0.932 0.852 \n",
+ "exchange_rate 1.524 1.407 \n",
+ "fred_md 1.101 0.482 \n",
+ "hospital 0.921 0.749 \n",
"m5 1.867 1.638 \n",
- "nn5_daily_without_missing 1.011 0.844 \n",
+ "nn5_daily_without_missing 1.011 0.842 \n",
"nn5_weekly 1.063 0.974 \n",
- "traffic 1.785 1.163 \n",
- "weather 0.755 0.675 \n",
+ "traffic 1.785 1.089 \n",
+ "weather 0.755 0.691 \n",
"m1_monthly 1.314 1.034 \n",
"m1_quarterly 2.078 1.594 \n",
"m1_yearly 4.894 3.574 \n",
- "m3_monthly 1.146 0.826 \n",
+ "m3_monthly 1.146 0.827 \n",
"m3_other 1.474 0.727 \n",
"m3_quarterly 1.425 1.164 \n",
- "m3_yearly 3.172 2.704 \n",
+ "m3_yearly 3.172 2.706 \n",
"m4_quarterly 1.602 1.142 \n",
- "m4_yearly 3.966 3.002 \n",
+ "m4_yearly 3.966 3.003 \n",
"tourism_monthly 1.631 1.441 \n",
"tourism_quarterly 1.699 1.506 \n",
"tourism_yearly 3.552 3.277 \n",
@@ -804,22 +827,22 @@
"metric scaled_crps \\\n",
"model SeasonalNaive StatisticalEnsemble \n",
"dataset \n",
- "australian_electricity_demand 0.098 0.058 \n",
+ "australian_electricity_demand 0.098 0.057 \n",
"car_parts_without_missing 2.225 1.132 \n",
"cif_2016 0.056 0.021 \n",
- "covid_deaths 0.116 0.025 \n",
+ "covid_deaths 0.116 0.024 \n",
"dominick 2.210 0.529 \n",
- "ercot 0.039 0.050 \n",
- "ett_small_15min 0.143 NaN \n",
- "ett_small_1h 0.153 0.123 \n",
+ "ercot 0.039 0.034 \n",
+ "ett_small_15min 0.143 0.083 \n",
+ "ett_small_1h 0.153 0.109 \n",
"exchange_rate 0.016 0.007 \n",
- "fred_md 0.082 0.035 \n",
+ "fred_md 0.082 0.033 \n",
"hospital 0.083 0.047 \n",
"m5 1.457 0.516 \n",
"nn5_daily_without_missing 0.535 0.146 \n",
"nn5_weekly 0.256 0.077 \n",
- "traffic 0.411 0.353 \n",
- "weather 0.702 0.207 \n",
+ "traffic 0.411 0.341 \n",
+ "weather 0.702 0.201 \n",
"m1_monthly 0.193 0.121 \n",
"m1_quarterly 0.143 0.070 \n",
"m1_yearly 0.150 0.092 \n",
@@ -868,22 +891,22 @@
"metric smape \\\n",
"model SeasonalNaive StatisticalEnsemble \n",
"dataset \n",
- "australian_electricity_demand 0.059 0.055 \n",
+ "australian_electricity_demand 0.059 0.051 \n",
"car_parts_without_missing 0.310 0.897 \n",
- "cif_2016 0.094 0.058 \n",
+ "cif_2016 0.094 0.057 \n",
"covid_deaths 0.093 0.054 \n",
"dominick 0.160 0.782 \n",
- "ercot 0.016 0.040 \n",
- "ett_small_15min 0.095 NaN \n",
- "ett_small_1h 0.103 0.131 \n",
+ "ercot 0.016 0.027 \n",
+ "ett_small_15min 0.095 0.100 \n",
+ "ett_small_1h 0.103 0.117 \n",
"exchange_rate 0.005 0.004 \n",
"fred_md 0.073 0.052 \n",
"hospital 0.105 0.086 \n",
"m5 0.404 0.716 \n",
"nn5_daily_without_missing 0.132 0.105 \n",
"nn5_weekly 0.066 0.060 \n",
- "traffic 0.244 0.179 \n",
- "weather 0.213 0.312 \n",
+ "traffic 0.244 0.161 \n",
+ "weather 0.213 0.314 \n",
"m1_monthly 0.087 0.073 \n",
"m1_quarterly 0.095 0.078 \n",
"m1_yearly 0.112 0.088 \n",
@@ -932,34 +955,34 @@
"metric time \\\n",
"model SeasonalNaive StatisticalEnsemble \n",
"dataset \n",
- "australian_electricity_demand 0.333 12602.872 \n",
- "car_parts_without_missing 0.944 110.575 \n",
- "cif_2016 0.672 83.170 \n",
- "covid_deaths 0.883 86.333 \n",
- "dominick 11.452 981.563 \n",
- "ercot 0.375 5119.783 \n",
- "ett_small_15min 0.422 NaN \n",
- "ett_small_1h 0.183 4206.250 \n",
- "exchange_rate 0.075 259.485 \n",
- "fred_md 0.872 235.237 \n",
- "hospital 0.880 78.938 \n",
- "m5 27.139 15599.160 \n",
- "nn5_daily_without_missing 0.882 235.894 \n",
- "nn5_weekly 0.812 51.829 \n",
- "traffic 2.627 12285.943 \n",
- "weather 19.922 4159.202 \n",
- "m1_monthly 0.842 84.964 \n",
- "m1_quarterly 0.836 56.230 \n",
- "m1_yearly 0.854 49.356 \n",
- "m3_monthly 0.904 155.557 \n",
- "m3_other 0.809 57.059 \n",
- "m3_quarterly 0.877 60.618 \n",
- "m3_yearly 0.838 53.385 \n",
- "m4_quarterly 1.491 408.369 \n",
- "m4_yearly 1.280 197.224 \n",
- "tourism_monthly 0.866 343.512 \n",
- "tourism_quarterly 0.843 69.271 \n",
- "tourism_yearly 0.842 51.096 \n",
+ "australian_electricity_demand 0.360 2716.869 \n",
+ "car_parts_without_missing 0.915 37.927 \n",
+ "cif_2016 0.603 9.755 \n",
+ "covid_deaths 0.815 22.031 \n",
+ "dominick 12.371 874.045 \n",
+ "ercot 0.397 390.501 \n",
+ "ett_small_15min 0.457 5034.763 \n",
+ "ett_small_1h 0.209 332.130 \n",
+ "exchange_rate 0.094 9.354 \n",
+ "fred_md 0.847 49.065 \n",
+ "hospital 0.840 30.615 \n",
+ "m5 29.344 15082.485 \n",
+ "nn5_daily_without_missing 0.812 38.450 \n",
+ "nn5_weekly 0.817 5.758 \n",
+ "traffic 3.057 4173.052 \n",
+ "weather 22.065 1108.493 \n",
+ "m1_monthly 0.826 27.710 \n",
+ "m1_quarterly 0.798 7.182 \n",
+ "m1_yearly 0.785 5.733 \n",
+ "m3_monthly 0.902 69.007 \n",
+ "m3_other 0.800 7.381 \n",
+ "m3_quarterly 0.820 12.747 \n",
+ "m3_yearly 0.812 8.576 \n",
+ "m4_quarterly 1.633 331.264 \n",
+ "m4_yearly 1.336 117.779 \n",
+ "tourism_monthly 0.865 151.140 \n",
+ "tourism_quarterly 0.814 16.846 \n",
+ "tourism_yearly 0.807 7.067 \n",
"\n",
"metric \n",
"model amazon/chronos-t5-large amazon/chronos-t5-mini \n",
@@ -994,13 +1017,27 @@
"tourism_yearly 5.982 1.653 "
]
},
- "execution_count": 8,
+ "execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# StatisticalEnsemble takes >24 hours to forecast on `ett_small_15min`, so the result is missing for this dataset.\n",
+ "original_datasets = [\n",
+ " \"m1_monthly\",\n",
+ " \"m1_quarterly\", \n",
+ " \"m1_yearly\", \n",
+ " \"m3_monthly\", \n",
+ " \"m3_other\", \n",
+ " \"m3_quarterly\",\n",
+ " \"m3_yearly\", \n",
+ " \"m4_quarterly\", \n",
+ " \"m4_yearly\", \n",
+ " \"tourism_monthly\",\n",
+ " \"tourism_quarterly\", \n",
+ " \"tourism_yearly\", \n",
+ "]\n",
"dataset_order = [\n",
" \"australian_electricity_demand\", \n",
" \"car_parts_without_missing\",\n",
@@ -1018,18 +1055,7 @@
" \"nn5_weekly\", \n",
" \"traffic\", \n",
" \"weather\",\n",
- " \"m1_monthly\",\n",
- " \"m1_quarterly\", \n",
- " \"m1_yearly\", \n",
- " \"m3_monthly\", \n",
- " \"m3_other\", \n",
- " \"m3_quarterly\",\n",
- " \"m3_yearly\", \n",
- " \"m4_quarterly\", \n",
- " \"m4_yearly\", \n",
- " \"tourism_monthly\",\n",
- " \"tourism_quarterly\", \n",
- " \"tourism_yearly\", \n",
+ " *original_datasets\n",
"]\n",
"table = table.reindex(dataset_order)\n",
"table"
@@ -1046,7 +1072,24 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from scipy.stats import gmean\n",
+ "\n",
+ "def calculate_gmean(table: pd.DataFrame):\n",
+ " results = []\n",
+ " for metric in [\"mase\", \"scaled_crps\", \"smape\"]:\n",
+ " scaled = table[metric].divide(table[metric][\"SeasonalNaive\"], axis=0).fillna(1.0)\n",
+ " agg = pd.concat({metric: scaled.apply(gmean)})\n",
+ " results.append(agg)\n",
+ " return pd.concat(results).round(3).to_frame().T"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
"metadata": {},
"outputs": [
{
@@ -1094,15 +1137,15 @@
" \n",
" 0 | \n",
" 1.0 | \n",
- " 0.836 | \n",
+ " 0.809 | \n",
" 0.81 | \n",
" 0.845 | \n",
" 1.0 | \n",
- " 0.505 | \n",
+ " 0.483 | \n",
" 0.472 | \n",
" 0.485 | \n",
" 1.0 | \n",
- " 0.987 | \n",
+ " 0.964 | \n",
" 1.034 | \n",
" 1.085 | \n",
"
\n",
@@ -1113,11 +1156,11 @@
"text/plain": [
" mase \\\n",
"model SeasonalNaive StatisticalEnsemble amazon/chronos-t5-large \n",
- "0 1.0 0.836 0.81 \n",
+ "0 1.0 0.809 0.81 \n",
"\n",
" scaled_crps \\\n",
"model amazon/chronos-t5-mini SeasonalNaive StatisticalEnsemble \n",
- "0 0.845 1.0 0.505 \n",
+ "0 0.845 1.0 0.483 \n",
"\n",
" smape \\\n",
"model amazon/chronos-t5-large amazon/chronos-t5-mini SeasonalNaive \n",
@@ -1125,23 +1168,16 @@
"\n",
" \n",
"model StatisticalEnsemble amazon/chronos-t5-large amazon/chronos-t5-mini \n",
- "0 0.987 1.034 1.085 "
+ "0 0.964 1.034 1.085 "
]
},
- "execution_count": 9,
+ "execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "from scipy.stats import gmean\n",
- "\n",
- "results = []\n",
- "for metric in [\"mase\", \"scaled_crps\", \"smape\"]:\n",
- " scaled = table[metric].divide(table[metric][\"SeasonalNaive\"], axis=0).fillna(1.0)\n",
- " agg = pd.concat({metric: scaled.apply(gmean)})\n",
- " results.append(agg)\n",
- "pd.concat(results).round(3).to_frame().T"
+ "calculate_gmean(table)"
]
},
{
@@ -1153,7 +1189,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
@@ -1180,7 +1216,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 52,
"metadata": {},
"outputs": [
{
@@ -1190,34 +1226,34 @@
"\\begin{tabular}{lllllllllllllllll}\n",
" & \\multicolumn{4}{r}{mase} & \\multicolumn{4}{r}{scaled_crps} & \\multicolumn{4}{r}{smape} & \\multicolumn{4}{r}{time} \\\\\n",
"model & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive & StatisticalEnsemble & Chronos (large) & Chronos (mini) & SeasonalNaive \\\\\n",
- "australian-electricity-demand & 1.215 & \\underline{1.184} & \\textbf{0.882} & 1.340 & 0.058 & \\underline{0.054} & \\textbf{0.042} & 0.098 & 0.055 & \\underline{0.051} & \\textbf{0.040} & 0.059 & 12602.9 & 4.7 & \\underline{2.1} & \\textbf{0.3} \\\\\n",
- "car-parts-without-missing & 1.050 & \\underline{0.807} & \\textbf{0.803} & 1.120 & 1.132 & \\underline{1.059} & \\textbf{1.022} & 2.225 & \\underline{0.897} & 0.947 & 0.957 & \\textbf{0.310} & 110.6 & 61.4 & \\underline{5.3} & \\textbf{0.9} \\\\\n",
- "cif-2016 & \\textbf{0.918} & \\underline{0.986} & 1.025 & 1.289 & 0.021 & \\textbf{0.015} & \\underline{0.019} & 0.056 & \\textbf{0.058} & \\underline{0.074} & 0.076 & 0.094 & 83.2 & 3.7 & \\underline{1.5} & \\textbf{0.7} \\\\\n",
- "covid-deaths & \\textbf{5.246} & \\underline{6.540} & 6.555 & 7.762 & \\textbf{0.025} & \\underline{0.050} & 0.072 & 0.116 & \\textbf{0.054} & 0.205 & 0.204 & \\underline{0.093} & 86.3 & 32.0 & \\underline{3.9} & \\textbf{0.9} \\\\\n",
- "dominick & 0.848 & \\underline{0.786} & \\textbf{0.782} & 0.828 & 0.529 & \\underline{0.414} & \\textbf{0.399} & 2.210 & \\underline{0.782} & 0.809 & 0.817 & \\textbf{0.160} & 981.6 & 8661.9 & \\underline{653.7} & \\textbf{11.5} \\\\\n",
- "ercot & 1.979 & \\textbf{0.578} & \\underline{0.585} & 0.761 & 0.050 & \\underline{0.017} & \\textbf{0.016} & 0.039 & 0.040 & \\textbf{0.012} & \\underline{0.012} & 0.016 & 5119.8 & 3.7 & \\underline{2.0} & \\textbf{0.4} \\\\\n",
- "ett-small-15min & N/A & \\textbf{0.714} & \\underline{0.739} & 0.768 & N/A & \\textbf{0.083} & \\underline{0.088} & 0.143 & N/A & \\underline{0.110} & 0.116 & \\textbf{0.095} & N/A & 4.9 & \\underline{2.0} & \\textbf{0.4} \\\\\n",
- "ett-small-1h & 1.003 & \\textbf{0.737} & \\underline{0.805} & 0.932 & 0.123 & \\textbf{0.083} & \\underline{0.085} & 0.153 & 0.131 & \\textbf{0.091} & \\underline{0.100} & 0.103 & 4206.2 & 4.6 & \\underline{1.7} & \\textbf{0.2} \\\\\n",
- "exchange-rate & \\textbf{1.429} & 1.882 & 2.118 & \\underline{1.524} & \\textbf{0.007} & 0.011 & \\underline{0.010} & 0.016 & \\textbf{0.004} & 0.006 & 0.007 & \\underline{0.005} & 259.5 & 3.6 & \\underline{1.6} & \\textbf{0.1} \\\\\n",
- "fred-md & \\textbf{0.489} & 0.571 & \\underline{0.564} & 1.101 & 0.035 & \\textbf{0.029} & \\underline{0.029} & 0.082 & \\textbf{0.052} & \\underline{0.052} & 0.052 & 0.073 & 235.2 & 15.5 & \\underline{2.4} & \\textbf{0.9} \\\\\n",
- "hospital & \\textbf{0.748} & \\underline{0.810} & 0.815 & 0.921 & \\textbf{0.047} & \\underline{0.059} & 0.061 & 0.083 & \\textbf{0.086} & \\underline{0.093} & 0.093 & 0.105 & 78.9 & 20.4 & \\underline{2.9} & \\textbf{0.9} \\\\\n",
- "m5 & 1.638 & \\underline{1.433} & \\textbf{1.432} & 1.867 & \\textbf{0.516} & 0.535 & \\underline{0.532} & 1.457 & \\underline{0.716} & 0.818 & 0.827 & \\textbf{0.404} & 15599.2 & 8071.7 & \\underline{662.5} & \\textbf{27.1} \\\\\n",
- "nn5-daily-without-missing & \\underline{0.844} & \\textbf{0.824} & 0.900 & 1.011 & 0.146 & \\textbf{0.119} & \\underline{0.136} & 0.535 & \\underline{0.105} & \\textbf{0.103} & 0.112 & 0.132 & 235.9 & 57.0 & \\underline{5.8} & \\textbf{0.9} \\\\\n",
- "nn5-weekly & 0.974 & \\textbf{0.929} & \\underline{0.933} & 1.063 & \\textbf{0.077} & \\underline{0.078} & 0.079 & 0.256 & 0.060 & \\textbf{0.058} & \\underline{0.058} & 0.066 & 51.8 & 3.9 & \\underline{1.5} & \\textbf{0.8} \\\\\n",
- "traffic & 1.163 & \\underline{0.644} & \\textbf{0.640} & 1.785 & 0.353 & \\textbf{0.115} & \\underline{0.120} & 0.411 & 0.179 & \\textbf{0.077} & \\underline{0.082} & 0.244 & 12285.9 & 202.3 & \\underline{18.9} & \\textbf{2.6} \\\\\n",
- "weather & 0.675 & \\textbf{0.565} & \\underline{0.598} & 0.755 & 0.207 & \\textbf{0.122} & \\underline{0.128} & 0.702 & \\underline{0.312} & 0.327 & 0.334 & \\textbf{0.213} & 4159.2 & 873.4 & \\underline{92.7} & \\textbf{19.9} \\\\\n",
- "m1-monthly & \\textbf{1.034} & \\underline{1.089} & 1.193 & 1.314 & 0.121 & \\textbf{0.106} & \\underline{0.111} & 0.193 & \\textbf{0.073} & \\underline{0.076} & 0.082 & 0.087 & 85.0 & 36.1 & \\underline{4.2} & \\textbf{0.8} \\\\\n",
- "m1-quarterly & \\textbf{1.594} & \\underline{1.730} & 1.799 & 2.078 & \\textbf{0.070} & 0.089 & \\underline{0.084} & 0.143 & \\textbf{0.078} & \\underline{0.087} & 0.092 & 0.095 & 56.2 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n",
- "m1-yearly & \\textbf{3.574} & \\underline{4.329} & 5.160 & 4.894 & \\textbf{0.092} & \\underline{0.117} & 0.119 & 0.150 & \\textbf{0.088} & \\underline{0.107} & 0.129 & 0.112 & 49.4 & 3.8 & \\underline{1.5} & \\textbf{0.9} \\\\\n",
- "m3-monthly & \\textbf{0.826} & \\underline{0.854} & 0.899 & 1.146 & \\textbf{0.078} & \\underline{0.083} & 0.084 & 0.166 & \\textbf{0.067} & \\underline{0.070} & 0.071 & 0.086 & 155.6 & 79.1 & \\underline{7.6} & \\textbf{0.9} \\\\\n",
- "m3-other & \\textbf{0.727} & \\underline{0.845} & 0.869 & 1.474 & \\textbf{0.027} & 0.033 & \\underline{0.032} & 0.070 & \\textbf{0.022} & \\underline{0.024} & 0.025 & 0.041 & 57.1 & 5.1 & \\underline{1.6} & \\textbf{0.8} \\\\\n",
- "m3-quarterly & \\textbf{1.164} & \\underline{1.170} & 1.292 & 1.425 & \\textbf{0.053} & \\underline{0.066} & 0.068 & 0.096 & \\textbf{0.046} & \\underline{0.048} & 0.053 & 0.055 & 60.6 & 14.5 & \\underline{2.3} & \\textbf{0.9} \\\\\n",
- "m3-yearly & \\textbf{2.704} & \\underline{3.062} & 3.486 & 3.172 & \\textbf{0.103} & \\underline{0.111} & 0.118 & 0.157 & \\textbf{0.080} & \\underline{0.089} & 0.101 & 0.089 & 53.4 & 9.4 & \\underline{1.9} & \\textbf{0.8} \\\\\n",
- "m4-quarterly & \\textbf{1.142} & \\underline{1.218} & 1.269 & 1.602 & \\textbf{0.062} & \\underline{0.068} & 0.068 & 0.113 & \\textbf{0.050} & \\underline{0.053} & 0.055 & 0.063 & 408.4 & 2404.0 & \\underline{170.8} & \\textbf{1.5} \\\\\n",
- "m4-yearly & \\textbf{3.002} & \\underline{3.559} & 3.750 & 3.966 & \\textbf{0.081} & \\underline{0.097} & 0.100 & 0.132 & \\textbf{0.067} & \\underline{0.081} & 0.086 & 0.082 & 197.2 & 1027.8 & \\underline{77.1} & \\textbf{1.3} \\\\\n",
- "tourism-monthly & \\textbf{1.441} & 1.783 & 1.974 & \\underline{1.631} & \\textbf{0.072} & \\underline{0.082} & 0.084 & 0.114 & \\textbf{0.092} & 0.121 & 0.132 & \\underline{0.108} & 343.5 & 55.7 & \\underline{5.7} & \\textbf{0.9} \\\\\n",
- "tourism-quarterly & \\textbf{1.506} & \\underline{1.636} & 1.800 & 1.699 & \\textbf{0.058} & \\underline{0.060} & 0.063 & 0.101 & \\textbf{0.072} & \\underline{0.081} & 0.090 & 0.083 & 69.3 & 12.6 & \\underline{2.2} & \\textbf{0.8} \\\\\n",
- "tourism-yearly & \\textbf{3.277} & 3.641 & 4.175 & \\underline{3.552} & \\textbf{0.111} & \\underline{0.126} & 0.142 & 0.160 & \\textbf{0.184} & \\underline{0.207} & 0.245 & 0.211 & 51.1 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n",
+ "australian-electricity-demand & \\underline{1.115} & 1.184 & \\textbf{0.882} & 1.340 & 0.057 & \\underline{0.054} & \\textbf{0.042} & 0.098 & \\underline{0.051} & 0.051 & \\textbf{0.040} & 0.059 & 2716.9 & 4.7 & \\underline{2.1} & \\textbf{0.4} \\\\\n",
+ "car-parts-without-missing & 1.051 & \\underline{0.807} & \\textbf{0.803} & 1.120 & 1.132 & \\underline{1.059} & \\textbf{1.022} & 2.225 & \\underline{0.897} & 0.947 & 0.957 & \\textbf{0.310} & 37.9 & 61.4 & \\underline{5.3} & \\textbf{0.9} \\\\\n",
+ "cif-2016 & \\textbf{0.902} & \\underline{0.986} & 1.025 & 1.289 & 0.021 & \\textbf{0.015} & \\underline{0.019} & 0.056 & \\textbf{0.057} & \\underline{0.074} & 0.076 & 0.094 & 9.8 & 3.7 & \\underline{1.5} & \\textbf{0.6} \\\\\n",
+ "covid-deaths & \\textbf{5.248} & \\underline{6.540} & 6.555 & 7.762 & \\textbf{0.024} & \\underline{0.050} & 0.072 & 0.116 & \\textbf{0.054} & 0.205 & 0.204 & \\underline{0.093} & 22.0 & 32.0 & \\underline{3.9} & \\textbf{0.8} \\\\\n",
+ "dominick & 0.848 & \\underline{0.786} & \\textbf{0.782} & 0.828 & 0.529 & \\underline{0.414} & \\textbf{0.399} & 2.210 & \\underline{0.782} & 0.809 & 0.817 & \\textbf{0.160} & 874.0 & 8661.9 & \\underline{653.7} & \\textbf{12.4} \\\\\n",
+ "ercot & 1.356 & \\textbf{0.578} & \\underline{0.585} & 0.761 & 0.034 & \\underline{0.017} & \\textbf{0.016} & 0.039 & 0.027 & \\textbf{0.012} & \\underline{0.012} & 0.016 & 390.5 & 3.7 & \\underline{2.0} & \\textbf{0.4} \\\\\n",
+ "ett-small-15min & \\textbf{0.638} & \\underline{0.714} & 0.739 & 0.768 & \\textbf{0.083} & \\underline{0.083} & 0.088 & 0.143 & \\underline{0.100} & 0.110 & 0.116 & \\textbf{0.095} & 5034.8 & 4.9 & \\underline{2.0} & \\textbf{0.5} \\\\\n",
+ "ett-small-1h & 0.852 & \\textbf{0.737} & \\underline{0.805} & 0.932 & 0.109 & \\textbf{0.083} & \\underline{0.085} & 0.153 & 0.117 & \\textbf{0.091} & \\underline{0.100} & 0.103 & 332.1 & 4.6 & \\underline{1.7} & \\textbf{0.2} \\\\\n",
+ "exchange-rate & \\textbf{1.407} & 1.882 & 2.118 & \\underline{1.524} & \\textbf{0.007} & 0.011 & \\underline{0.010} & 0.016 & \\textbf{0.004} & 0.006 & 0.007 & \\underline{0.005} & 9.4 & 3.6 & \\underline{1.6} & \\textbf{0.1} \\\\\n",
+ "fred-md & \\textbf{0.482} & 0.571 & \\underline{0.564} & 1.101 & 0.033 & \\textbf{0.029} & \\underline{0.029} & 0.082 & \\textbf{0.052} & \\underline{0.052} & 0.052 & 0.073 & 49.1 & 15.5 & \\underline{2.4} & \\textbf{0.8} \\\\\n",
+ "hospital & \\textbf{0.749} & \\underline{0.810} & 0.815 & 0.921 & \\textbf{0.047} & \\underline{0.059} & 0.061 & 0.083 & \\textbf{0.086} & \\underline{0.093} & 0.093 & 0.105 & 30.6 & 20.4 & \\underline{2.9} & \\textbf{0.8} \\\\\n",
+ "m5 & 1.638 & \\underline{1.433} & \\textbf{1.432} & 1.867 & \\textbf{0.516} & 0.535 & \\underline{0.532} & 1.457 & \\underline{0.716} & 0.818 & 0.827 & \\textbf{0.404} & 15082.5 & 8071.7 & \\underline{662.5} & \\textbf{29.3} \\\\\n",
+ "nn5-daily-without-missing & \\underline{0.842} & \\textbf{0.824} & 0.900 & 1.011 & 0.146 & \\textbf{0.119} & \\underline{0.136} & 0.535 & \\underline{0.105} & \\textbf{0.103} & 0.112 & 0.132 & 38.4 & 57.0 & \\underline{5.8} & \\textbf{0.8} \\\\\n",
+ "nn5-weekly & 0.974 & \\textbf{0.929} & \\underline{0.933} & 1.063 & \\textbf{0.077} & \\underline{0.078} & 0.079 & 0.256 & 0.060 & \\textbf{0.058} & \\underline{0.058} & 0.066 & 5.8 & 3.9 & \\underline{1.5} & \\textbf{0.8} \\\\\n",
+ "traffic & 1.089 & \\underline{0.644} & \\textbf{0.640} & 1.785 & 0.341 & \\textbf{0.115} & \\underline{0.120} & 0.411 & 0.161 & \\textbf{0.077} & \\underline{0.082} & 0.244 & 4173.1 & 202.3 & \\underline{18.9} & \\textbf{3.1} \\\\\n",
+ "weather & 0.691 & \\textbf{0.565} & \\underline{0.598} & 0.755 & 0.201 & \\textbf{0.122} & \\underline{0.128} & 0.702 & \\underline{0.314} & 0.327 & 0.334 & \\textbf{0.213} & 1108.5 & 873.4 & \\underline{92.7} & \\textbf{22.1} \\\\\n",
+ "m1-monthly & \\textbf{1.034} & \\underline{1.089} & 1.193 & 1.314 & 0.121 & \\textbf{0.106} & \\underline{0.111} & 0.193 & \\textbf{0.073} & \\underline{0.076} & 0.082 & 0.087 & 27.7 & 36.1 & \\underline{4.2} & \\textbf{0.8} \\\\\n",
+ "m1-quarterly & \\textbf{1.594} & \\underline{1.730} & 1.799 & 2.078 & \\textbf{0.070} & 0.089 & \\underline{0.084} & 0.143 & \\textbf{0.078} & \\underline{0.087} & 0.092 & 0.095 & 7.2 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n",
+ "m1-yearly & \\textbf{3.574} & \\underline{4.329} & 5.160 & 4.894 & \\textbf{0.092} & \\underline{0.117} & 0.119 & 0.150 & \\textbf{0.088} & \\underline{0.107} & 0.129 & 0.112 & 5.7 & 3.8 & \\underline{1.5} & \\textbf{0.8} \\\\\n",
+ "m3-monthly & \\textbf{0.827} & \\underline{0.854} & 0.899 & 1.146 & \\textbf{0.078} & \\underline{0.083} & 0.084 & 0.166 & \\textbf{0.067} & \\underline{0.070} & 0.071 & 0.086 & 69.0 & 79.1 & \\underline{7.6} & \\textbf{0.9} \\\\\n",
+ "m3-other & \\textbf{0.727} & \\underline{0.845} & 0.869 & 1.474 & \\textbf{0.027} & 0.033 & \\underline{0.032} & 0.070 & \\textbf{0.022} & \\underline{0.024} & 0.025 & 0.041 & 7.4 & 5.1 & \\underline{1.6} & \\textbf{0.8} \\\\\n",
+ "m3-quarterly & \\textbf{1.164} & \\underline{1.170} & 1.292 & 1.425 & \\textbf{0.053} & \\underline{0.066} & 0.068 & 0.096 & \\textbf{0.046} & \\underline{0.048} & 0.053 & 0.055 & 12.7 & 14.5 & \\underline{2.3} & \\textbf{0.8} \\\\\n",
+ "m3-yearly & \\textbf{2.706} & \\underline{3.062} & 3.486 & 3.172 & \\textbf{0.103} & \\underline{0.111} & 0.118 & 0.157 & \\textbf{0.080} & \\underline{0.089} & 0.101 & 0.089 & 8.6 & 9.4 & \\underline{1.9} & \\textbf{0.8} \\\\\n",
+ "m4-quarterly & \\textbf{1.142} & \\underline{1.218} & 1.269 & 1.602 & \\textbf{0.062} & \\underline{0.068} & 0.068 & 0.113 & \\textbf{0.050} & \\underline{0.053} & 0.055 & 0.063 & 331.3 & 2404.0 & \\underline{170.8} & \\textbf{1.6} \\\\\n",
+ "m4-yearly & \\textbf{3.003} & \\underline{3.559} & 3.750 & 3.966 & \\textbf{0.081} & \\underline{0.097} & 0.100 & 0.132 & \\textbf{0.067} & \\underline{0.081} & 0.086 & 0.082 & 117.8 & 1027.8 & \\underline{77.1} & \\textbf{1.3} \\\\\n",
+ "tourism-monthly & \\textbf{1.441} & 1.783 & 1.974 & \\underline{1.631} & \\textbf{0.072} & \\underline{0.082} & 0.084 & 0.114 & \\textbf{0.092} & 0.121 & 0.132 & \\underline{0.108} & 151.1 & 55.7 & \\underline{5.7} & \\textbf{0.9} \\\\\n",
+ "tourism-quarterly & \\textbf{1.506} & \\underline{1.636} & 1.800 & 1.699 & \\textbf{0.058} & \\underline{0.060} & 0.063 & 0.101 & \\textbf{0.072} & \\underline{0.081} & 0.090 & 0.083 & 16.8 & 12.6 & \\underline{2.2} & \\textbf{0.8} \\\\\n",
+ "tourism-yearly & \\textbf{3.277} & 3.641 & 4.175 & \\underline{3.552} & \\textbf{0.111} & \\underline{0.126} & 0.142 & 0.160 & \\textbf{0.184} & \\underline{0.207} & 0.245 & 0.211 & 7.1 & 6.0 & \\underline{1.7} & \\textbf{0.8} \\\\\n",
"\\end{tabular}\n",
"\n"
]
@@ -1226,11 +1262,18 @@
"source": [
"print(full_df.style.to_latex().replace(\"nan\", \"N/A\"))"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "nx",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@@ -1244,9 +1287,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.14"
+ "version": "3.10.13"
}
},
"nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
}
diff --git a/experiments/amazon-chronos/results/complete-results.csv b/experiments/amazon-chronos/results/complete-results.csv
new file mode 100644
index 00000000..75576d5c
--- /dev/null
+++ b/experiments/amazon-chronos/results/complete-results.csv
@@ -0,0 +1,225 @@
+dataset,metric,model,value
+m5,mase,SeasonalNaive,1.8674323634298993
+m5,mase,StatisticalEnsemble,1.638152191603833
+m5,scaled_crps,SeasonalNaive,1.4574002623684272
+m5,scaled_crps,StatisticalEnsemble,0.5163249612630462
+m5,smape,SeasonalNaive,0.40357140598445107
+m5,smape,StatisticalEnsemble,0.7164228919710989
+m5,time,SeasonalNaive,29.344236135482788
+m5,time,StatisticalEnsemble,15082.48518037796
+dominick,mase,SeasonalNaive,0.8275396708993014
+dominick,mase,StatisticalEnsemble,0.8480031538431272
+dominick,scaled_crps,SeasonalNaive,2.210459061912306
+dominick,scaled_crps,StatisticalEnsemble,0.5288209674678412
+dominick,smape,SeasonalNaive,0.16016363526707927
+dominick,smape,StatisticalEnsemble,0.7823167704484896
+dominick,time,SeasonalNaive,12.37143611907959
+dominick,time,StatisticalEnsemble,874.0449843406677
+nn5_daily_without_missing,mase,SeasonalNaive,1.0112834698529345
+nn5_daily_without_missing,mase,StatisticalEnsemble,0.8423753193786091
+nn5_daily_without_missing,scaled_crps,SeasonalNaive,0.5351702879273849
+nn5_daily_without_missing,scaled_crps,StatisticalEnsemble,0.14553736570775094
+nn5_daily_without_missing,smape,SeasonalNaive,0.13235231518183066
+nn5_daily_without_missing,smape,StatisticalEnsemble,0.10488079515342492
+nn5_daily_without_missing,time,SeasonalNaive,0.8123540878295898
+nn5_daily_without_missing,time,StatisticalEnsemble,38.4502534866333
+nn5_weekly,mase,SeasonalNaive,1.062848269026432
+nn5_weekly,mase,StatisticalEnsemble,0.9735932153741834
+nn5_weekly,scaled_crps,SeasonalNaive,0.25582228822873815
+nn5_weekly,scaled_crps,StatisticalEnsemble,0.07720804470616706
+nn5_weekly,smape,SeasonalNaive,0.06633929067482891
+nn5_weekly,smape,StatisticalEnsemble,0.06025986107271241
+nn5_weekly,time,SeasonalNaive,0.8171298503875732
+nn5_weekly,time,StatisticalEnsemble,5.758360147476196
+traffic,mase,SeasonalNaive,1.7853234976926748
+traffic,mase,StatisticalEnsemble,1.0892706069210114
+traffic,scaled_crps,SeasonalNaive,0.41143591353383
+traffic,scaled_crps,StatisticalEnsemble,0.3414031320024622
+traffic,smape,SeasonalNaive,0.2438530051030199
+traffic,smape,StatisticalEnsemble,0.16081715430023405
+traffic,time,SeasonalNaive,3.057039260864258
+traffic,time,StatisticalEnsemble,4173.05211687088
+weather,mase,SeasonalNaive,0.754604010525011
+weather,mase,StatisticalEnsemble,0.6914725865126918
+weather,scaled_crps,SeasonalNaive,0.7023248754552787
+weather,scaled_crps,StatisticalEnsemble,0.20146415997944236
+weather,smape,SeasonalNaive,0.21257362636712376
+weather,smape,StatisticalEnsemble,0.31372190193744925
+weather,time,SeasonalNaive,22.064918279647827
+weather,time,StatisticalEnsemble,1108.4931182861328
+australian_electricity_demand,mase,SeasonalNaive,1.3403993825206266
+australian_electricity_demand,mase,StatisticalEnsemble,1.1151674787713628
+australian_electricity_demand,scaled_crps,SeasonalNaive,0.09814662393134205
+australian_electricity_demand,scaled_crps,StatisticalEnsemble,0.05735046591017824
+australian_electricity_demand,smape,SeasonalNaive,0.058815216824657776
+australian_electricity_demand,smape,StatisticalEnsemble,0.05083634433366544
+australian_electricity_demand,time,SeasonalNaive,0.3602612018585205
+australian_electricity_demand,time,StatisticalEnsemble,2716.8693885803223
+car_parts_without_missing,mase,SeasonalNaive,1.1201381265856145
+car_parts_without_missing,mase,StatisticalEnsemble,1.050660105202646
+car_parts_without_missing,scaled_crps,SeasonalNaive,2.2252592727461242
+car_parts_without_missing,scaled_crps,StatisticalEnsemble,1.1320439488874523
+car_parts_without_missing,smape,SeasonalNaive,0.31018504435473626
+car_parts_without_missing,smape,StatisticalEnsemble,0.8966779513784662
+car_parts_without_missing,time,SeasonalNaive,0.9146387577056884
+car_parts_without_missing,time,StatisticalEnsemble,37.927063941955566
+cif_2016,mase,SeasonalNaive,1.2892906338147911
+cif_2016,mase,StatisticalEnsemble,0.9022365251221355
+cif_2016,scaled_crps,SeasonalNaive,0.0562778589590996
+cif_2016,scaled_crps,StatisticalEnsemble,0.020951705353389735
+cif_2016,smape,SeasonalNaive,0.094358553312353
+cif_2016,smape,StatisticalEnsemble,0.05749614987192558
+cif_2016,time,SeasonalNaive,0.6034519672393799
+cif_2016,time,StatisticalEnsemble,9.755268335342407
+covid_deaths,mase,SeasonalNaive,7.7623797482537915
+covid_deaths,mase,StatisticalEnsemble,5.247590431278719
+covid_deaths,scaled_crps,SeasonalNaive,0.11604983030613081
+covid_deaths,scaled_crps,StatisticalEnsemble,0.024490948114393132
+covid_deaths,smape,SeasonalNaive,0.09313273813541141
+covid_deaths,smape,StatisticalEnsemble,0.053916857464500395
+covid_deaths,time,SeasonalNaive,0.8153104782104492
+covid_deaths,time,StatisticalEnsemble,22.0314781665802
+ercot,mase,SeasonalNaive,0.7613353868080769
+ercot,mase,StatisticalEnsemble,1.3562494003539405
+ercot,scaled_crps,SeasonalNaive,0.03946121626919911
+ercot,scaled_crps,StatisticalEnsemble,0.034150258315336315
+ercot,smape,SeasonalNaive,0.015630707775037007
+ercot,smape,StatisticalEnsemble,0.026523641327514176
+ercot,time,SeasonalNaive,0.3966584205627441
+ercot,time,StatisticalEnsemble,390.5005609989166
+ett_small_15min,mase,SeasonalNaive,0.7677330344383062
+ett_small_15min,mase,StatisticalEnsemble,0.6382274420888457
+ett_small_15min,scaled_crps,SeasonalNaive,0.1430347714870558
+ett_small_15min,scaled_crps,StatisticalEnsemble,0.08269271740545785
+ett_small_15min,smape,SeasonalNaive,0.09459814128105122
+ett_small_15min,smape,StatisticalEnsemble,0.10039187891984028
+ett_small_15min,time,SeasonalNaive,0.4571559429168701
+ett_small_15min,time,StatisticalEnsemble,5034.763110637665
+ett_small_1h,mase,SeasonalNaive,0.9316203232327729
+ett_small_1h,mase,StatisticalEnsemble,0.8519611234196047
+ett_small_1h,scaled_crps,SeasonalNaive,0.15305326839468023
+ett_small_1h,scaled_crps,StatisticalEnsemble,0.10893023778108452
+ett_small_1h,smape,SeasonalNaive,0.10259586641393585
+ett_small_1h,smape,StatisticalEnsemble,0.1171297505877785
+ett_small_1h,time,SeasonalNaive,0.2092659473419189
+ett_small_1h,time,StatisticalEnsemble,332.1302688121796
+exchange_rate,mase,SeasonalNaive,1.5238482740215735
+exchange_rate,mase,StatisticalEnsemble,1.4067630220218135
+exchange_rate,scaled_crps,SeasonalNaive,0.01577147847394285
+exchange_rate,scaled_crps,StatisticalEnsemble,0.007234723993796653
+exchange_rate,smape,SeasonalNaive,0.004661980297476255
+exchange_rate,smape,StatisticalEnsemble,0.0044398183280252015
+exchange_rate,time,SeasonalNaive,0.0944149494171142
+exchange_rate,time,StatisticalEnsemble,9.35352873802185
+fred_md,mase,SeasonalNaive,1.1008000208784752
+fred_md,mase,StatisticalEnsemble,0.4818602144932258
+fred_md,scaled_crps,SeasonalNaive,0.08215697114580049
+fred_md,scaled_crps,StatisticalEnsemble,0.03272829800542708
+fred_md,smape,SeasonalNaive,0.0729485559677336
+fred_md,smape,StatisticalEnsemble,0.05220994480946772
+fred_md,time,SeasonalNaive,0.8469631671905518
+fred_md,time,StatisticalEnsemble,49.06454372406006
+hospital,mase,SeasonalNaive,0.9205278258071787
+hospital,mase,StatisticalEnsemble,0.7485655129982478
+hospital,scaled_crps,SeasonalNaive,0.08269508339071587
+hospital,scaled_crps,StatisticalEnsemble,0.046899209602966556
+hospital,smape,SeasonalNaive,0.10512677152235944
+hospital,smape,StatisticalEnsemble,0.0859826808296096
+hospital,time,SeasonalNaive,0.8399276733398438
+hospital,time,StatisticalEnsemble,30.61531782150269
+m1_yearly,mase,SeasonalNaive,4.894322198189649
+m1_yearly,mase,StatisticalEnsemble,3.574145484272531
+m1_yearly,scaled_crps,SeasonalNaive,0.14966361102972647
+m1_yearly,scaled_crps,StatisticalEnsemble,0.0922698712661802
+m1_yearly,smape,SeasonalNaive,0.11216119472992997
+m1_yearly,smape,StatisticalEnsemble,0.08812465232291931
+m1_yearly,time,SeasonalNaive,0.7850766181945801
+m1_yearly,time,StatisticalEnsemble,5.73260760307312
+m1_quarterly,mase,SeasonalNaive,2.0775365099320915
+m1_quarterly,mase,StatisticalEnsemble,1.5939583404585844
+m1_quarterly,scaled_crps,SeasonalNaive,0.14308704311173895
+m1_quarterly,scaled_crps,StatisticalEnsemble,0.06983964327851233
+m1_quarterly,smape,SeasonalNaive,0.0947186951123695
+m1_quarterly,smape,StatisticalEnsemble,0.07840308519164578
+m1_quarterly,time,SeasonalNaive,0.7976851463317871
+m1_quarterly,time,StatisticalEnsemble,7.182486057281494
+m1_monthly,mase,SeasonalNaive,1.3144614942308708
+m1_monthly,mase,StatisticalEnsemble,1.0339353928765094
+m1_monthly,scaled_crps,SeasonalNaive,0.1929014674058537
+m1_monthly,scaled_crps,StatisticalEnsemble,0.12133830358487802
+m1_monthly,smape,SeasonalNaive,0.08654688593844032
+m1_monthly,smape,StatisticalEnsemble,0.0730905532066498
+m1_monthly,time,SeasonalNaive,0.8262171745300293
+m1_monthly,time,StatisticalEnsemble,27.710077047348022
+m3_yearly,mase,SeasonalNaive,3.1717102343239496
+m3_yearly,mase,StatisticalEnsemble,2.7060074099502085
+m3_yearly,scaled_crps,SeasonalNaive,0.15694102939190013
+m3_yearly,scaled_crps,StatisticalEnsemble,0.10307027528088823
+m3_yearly,smape,SeasonalNaive,0.08939945244930989
+m3_yearly,smape,StatisticalEnsemble,0.07999691218033288
+m3_yearly,time,SeasonalNaive,0.8121771812438965
+m3_yearly,time,StatisticalEnsemble,8.576204299926758
+m3_quarterly,mase,SeasonalNaive,1.425343782675886
+m3_quarterly,mase,StatisticalEnsemble,1.16376103880926
+m3_quarterly,scaled_crps,SeasonalNaive,0.09558308169170797
+m3_quarterly,scaled_crps,StatisticalEnsemble,0.05341428588839114
+m3_quarterly,smape,SeasonalNaive,0.05532565657960303
+m3_quarterly,smape,StatisticalEnsemble,0.04614976537182415
+m3_quarterly,time,SeasonalNaive,0.8202786445617676
+m3_quarterly,time,StatisticalEnsemble,12.746824741363524
+m3_monthly,mase,SeasonalNaive,1.1462045740727513
+m3_monthly,mase,StatisticalEnsemble,0.8267793470895428
+m3_monthly,scaled_crps,SeasonalNaive,0.1657581291619502
+m3_monthly,scaled_crps,StatisticalEnsemble,0.07762084692788024
+m3_monthly,smape,SeasonalNaive,0.0861955076505472
+m3_monthly,smape,StatisticalEnsemble,0.06722927141202216
+m3_monthly,time,SeasonalNaive,0.9021949768066406
+m3_monthly,time,StatisticalEnsemble,69.00676345825195
+m3_other,mase,SeasonalNaive,1.4741669727228415
+m3_other,mase,StatisticalEnsemble,0.7270561697641876
+m3_other,scaled_crps,SeasonalNaive,0.06994385450714162
+m3_other,scaled_crps,StatisticalEnsemble,0.026865030394171055
+m3_other,smape,SeasonalNaive,0.041276500014025615
+m3_other,smape,StatisticalEnsemble,0.021632279162270698
+m3_other,time,SeasonalNaive,0.7998371124267578
+m3_other,time,StatisticalEnsemble,7.380923748016357
+tourism_yearly,mase,SeasonalNaive,3.552009714599977
+tourism_yearly,mase,StatisticalEnsemble,3.277077062185471
+tourism_yearly,scaled_crps,SeasonalNaive,0.15974723271821942
+tourism_yearly,scaled_crps,StatisticalEnsemble,0.11125194688603007
+tourism_yearly,smape,SeasonalNaive,0.21083326227292737
+tourism_yearly,smape,StatisticalEnsemble,0.18384696850918383
+tourism_yearly,time,SeasonalNaive,0.8069603443145752
+tourism_yearly,time,StatisticalEnsemble,7.067137002944946
+tourism_quarterly,mase,SeasonalNaive,1.6989892602245846
+tourism_quarterly,mase,StatisticalEnsemble,1.5058255429273564
+tourism_quarterly,scaled_crps,SeasonalNaive,0.10142968168460322
+tourism_quarterly,scaled_crps,StatisticalEnsemble,0.0576224860295668
+tourism_quarterly,smape,SeasonalNaive,0.08304859164472883
+tourism_quarterly,smape,StatisticalEnsemble,0.07237160342871483
+tourism_quarterly,time,SeasonalNaive,0.8143134117126465
+tourism_quarterly,time,StatisticalEnsemble,16.84568691253662
+tourism_monthly,mase,SeasonalNaive,1.6309399914977092
+tourism_monthly,mase,StatisticalEnsemble,1.4409102971283867
+tourism_monthly,scaled_crps,SeasonalNaive,0.11447973197962824
+tourism_monthly,scaled_crps,StatisticalEnsemble,0.07190242570828337
+tourism_monthly,smape,SeasonalNaive,0.10834946268820275
+tourism_monthly,smape,StatisticalEnsemble,0.0915893066611628
+tourism_monthly,time,SeasonalNaive,0.864677906036377
+tourism_monthly,time,StatisticalEnsemble,151.14043617248535
+m4_yearly,mase,SeasonalNaive,3.965954942439523
+m4_yearly,mase,StatisticalEnsemble,3.002568369690919
+m4_yearly,scaled_crps,SeasonalNaive,0.13235538778860942
+m4_yearly,scaled_crps,StatisticalEnsemble,0.08139201567816129
+m4_yearly,smape,SeasonalNaive,0.0817671723808375
+m4_yearly,smape,StatisticalEnsemble,0.06748203578862982
+m4_yearly,time,SeasonalNaive,1.3356640338897705
+m4_yearly,time,StatisticalEnsemble,117.77939891815186
+m4_quarterly,mase,SeasonalNaive,1.6022471757947163
+m4_quarterly,mase,StatisticalEnsemble,1.1424438068562632
+m4_quarterly,scaled_crps,SeasonalNaive,0.11284029920261848
+m4_quarterly,scaled_crps,StatisticalEnsemble,0.061619570590469346
+m4_quarterly,smape,SeasonalNaive,0.06260683416118425
+m4_quarterly,smape,StatisticalEnsemble,0.049689605512879355
+m4_quarterly,time,SeasonalNaive,1.6325972080230713
+m4_quarterly,time,StatisticalEnsemble,331.26417803764343
diff --git a/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py b/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py
index a7a2f7e3..0bedbbcf 100644
--- a/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py
+++ b/experiments/amazon-chronos/src/eval_utils/statsforecast_pipeline.py
@@ -72,7 +72,7 @@ def run_statistical_ensemble(
freq: str,
seasonality: int,
quantiles: List[float],
- max_context_length: int = 5_000,
+ max_context_length: int = 2_000,
) -> Tuple[pd.DataFrame, float, str]:
os.environ["NIXTLA_ID_AS_COL"] = "true"
models = [
@@ -82,14 +82,11 @@ def run_statistical_ensemble(
DynamicOptimizedTheta(season_length=seasonality),
]
init_time = time()
- series_per_core = 15
- n_series = train_df["unique_id"].nunique()
- n_jobs = max(1, min(n_series // series_per_core, os.cpu_count()))
sf = StatsForecast(
models=models,
fallback_model=SeasonalNaive(season_length=seasonality),
freq=freq,
- n_jobs=n_jobs,
+ n_jobs=-1,
)
# Shorten all time series to at most max_context_length to avoid extremely long runtime
train_df = train_df.groupby("unique_id", sort=False, as_index=False).tail(
diff --git a/experiments/amazon-chronos/src/eval_utils/utils.py b/experiments/amazon-chronos/src/eval_utils/utils.py
index 8c9c4127..712d395a 100644
--- a/experiments/amazon-chronos/src/eval_utils/utils.py
+++ b/experiments/amazon-chronos/src/eval_utils/utils.py
@@ -195,8 +195,10 @@ def evaluate_models(self, models: List[str]) -> pd.DataFrame:
fcsts_df = pd.concat(fcsts_df, axis=1).reset_index()
fcsts_df["ds"] = pd.to_datetime(fcsts_df["ds"])
times_df = pd.concat(times_df)
- return evaluate_from_predictions(
- models=model, fcsts_df=fcsts_df, times_df=times_df
+ return self.evaluate_from_predictions(
+ models=model,
+ fcsts_df=fcsts_df,
+ times_df=times_df,
)
def evaluate_from_predictions(