Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating the R code from the master #208

Merged
merged 13 commits into from
Nov 1, 2023
Merged
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: smooth
Type: Package
Title: Forecasting Using State Space Models
Version: 4.0.0
Date: 2023-09-15
Version: 4.0.1.41006
Date: 2023-11-01
Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
URL: https://github.com/config-i1/smooth
Expand Down
13 changes: 13 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
smooth v4.0.1 (Release data: 2023-11-01)
=======

Changes:
* Initialisation of ARIMA is now more precise, both for "optimal" and "backcasting", leading to a better fit of the model to the data. This was done using more meaningful initial values of states and by cutting the small step of refitting the head states in ARIMA with backcasting.

Bugfixes:
* Fixed a bug with multicov.adam() and simulated covariance matrix.
* adam() ARIMA and msarima() with backcasting would count number of components towards the number of estimated parameters.
* adam() would not remove components of ETS correctly in cases of small samples (e.g. obs == 2*frequency).
* Fixed the code of sma() - there was a bug in the architecture resulting in wrong fitting.


smooth v4.0.0 (Release data: 2023-09-15)
=======

Expand Down
50 changes: 21 additions & 29 deletions R/adam-sma.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ sma <- function(y, order=NULL, ic=c("AICc","AIC","BIC","BICc"),
}
else{
model <- ellipsis$model;

if(inherits(y,"Mdata")){
h <- y$h;
holdout <- TRUE;
y <- ts(c(y$x,y$xx),start=start(y$x),frequency=frequency(y$x));
}
}

# If a previous model provided as a model, write down the variables
Expand Down Expand Up @@ -190,35 +196,22 @@ sma <- function(y, order=NULL, ic=c("AICc","AIC","BIC","BICc"),
ot[] <- 1;

CreatorSMA <- function(order){
lagsModelAll <- as.matrix(rep(1,order));
lagsModelMax <- 1;
obsStates <- obsInSample+1;

# Create ADAM profiles
profilesRecentTable <- matrix(0,length(lagsModelAll),lagsModelMax,
dimnames=list(lagsModelAll,NULL));
# Create the matrix of observed profiles indices
profilesObservedTable <- matrix((1:order)-1,length(lagsModelAll),obsAll+lagsModelMax,
dimnames=list(lagsModelAll,NULL));

if(order>1){
matF <- rbind(cbind(rep(1/order,order-1),diag(order-1)),
c(1/order,rep(0,order-1)));
matWt <- matrix(c(1,rep(0,order-1)),obsInSample,order,byrow=TRUE);
}
else{
matF <- matrix(1,1,1);
matWt <- matrix(1,obsInSample,1);
}
lagsModelAll <- matrix(1:order, ncol=1);
lagsModelMax <- max(lagsModelAll);
obsStates <- obsInSample+lagsModelMax;

# # Create ADAM profiles
adamProfiles <- adamProfileCreator(lagsModelAll, lagsModelMax, obsAll);

profilesObservedTable <- adamProfiles$observed;
profilesRecentTable <- adamProfiles$recent;
profilesRecentTable[order,1:order] <- mean(yInSample[1:order]);

# State space matrices
matF <- matrix(1/order,order,order);
matWt <- matrix(1,obsInSample,order,byrow=TRUE);
vecG <- matrix(1/order,order);
matVt <- matrix(NA,order,obsStates);
matVt[1,1:order] <- rep(mean(yInSample[1:order]),order);
# if(order>1){
# for(i in 2:order){
# matVt[i,1:(order-i+1)] <- matVt[i-1,1:(order-i+1)+1] -
# matVt[1,1:(order-i+1)] * matF[i-1,1];
# }
# }
matVt <- matrix(0,order,obsStates);

#### Fitter and the losses calculation ####
adamFitted <- adamFitterWrap(matVt, matWt, matF, vecG,
Expand All @@ -234,7 +227,6 @@ sma <- function(y, order=NULL, ic=c("AICc","AIC","BIC","BICc"),
ICValue <- icFunction(logLik);

return(ICValue);
# return(list(order=order,cfObjective=cfObjective,ICValue=ICValue,logLik=logLik));
}


Expand Down
28 changes: 25 additions & 3 deletions R/adam.R
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,20 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
if(initialArimaEstimate){
matVt[componentsNumberETS+1:componentsNumberARIMA, 1:initialArimaNumber] <-
switch(Etype, "A"=0, "M"=1);
if(any(lags>1)){
yDecomposition <- tail(msdecompose(yInSample,
lags[lags!=1],
type=switch(Etype,
"A"="additive",
"M"="multiplicative"))$seasonal,1)[[1]];
}
else{
yDecomposition <- switch(Etype,
"A"=mean(diff(yInSample)),
"M"=exp(mean(diff(log(yInSample)))));
}
matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber] <-
rep(yDecomposition,ceiling(initialArimaNumber/max(lags)))[1:initialArimaNumber];
# rep(yInSample[1:initialArimaNumber],each=componentsNumberARIMA);

# Failsafe mechanism in case the sample is too small
Expand Down Expand Up @@ -1448,7 +1462,8 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
}
# This is needed in order to propagate initials of ARIMA to all components
else if(any(c(arEstimate,maEstimate))){
if(nrow(nonZeroARI)>0 && nrow(nonZeroARI)>=nrow(nonZeroMA)){
# if(nrow(nonZeroARI)>0 && nrow(nonZeroARI)>=nrow(nonZeroMA)){
# if(nrow(nonZeroARI)>0){
matVt[componentsNumberETS+nonZeroARI[,2], 1:initialArimaNumber] <-
switch(Etype,
"A"= arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%
Expand All @@ -1457,7 +1472,7 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
"M"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%
t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber])) /
tail(arimaPolynomials$ariPolynomial,1)));
}
# }
# else{
# matVt[componentsNumberETS+nonZeroMA[,2],
# 1:initialArimaNumber] <-
Expand Down Expand Up @@ -1764,6 +1779,8 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
Bu[j+1:initialArimaNumber] <- Inf;
}
else{
# Make sure that ARIMA states are positive to avoid errors
B[j+1:initialArimaNumber] <- abs(B[j+1:initialArimaNumber]);
Bl[j+1:initialArimaNumber] <- 0;
Bu[j+1:initialArimaNumber] <- Inf;
}
Expand Down Expand Up @@ -2064,6 +2081,7 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
profilesRecentTable[] <- adamElements$matVt[,1:lagsModelMax];
# print(round(B,3))
# print(adamElements$vecG)
# print(profilesRecentTable)

#### Fitter and the losses calculation ####
adamFitted <- adamFitterWrap(adamElements$matVt, adamElements$matWt, adamElements$matF, adamElements$vecG,
Expand Down Expand Up @@ -5755,7 +5773,7 @@ plot.adam <- function(x, which=c(1,2,4,6), level=0.95, legend=FALSE,
x$states <- cbind(actuals(x),x$states,residuals(x));
colnames(x$states) <- statesNames;
if(ncol(x$states)>10){
message("Too many states. Plotting them one by one on several graphs.");
message("Too many states. Plotting them one by one on several plots.");
if(is.null(ellipsis$main)){
ellipsisMain <- NULL;
}
Expand Down Expand Up @@ -10121,6 +10139,10 @@ multicov.adam <- function(object, type=c("analytical","empirical","simulated"),
yForecast[i] <- mean(ySimulated[i,],na.rm=TRUE);
}
ySimulated[i,] <- ySimulated[i,]-yForecast[i];
# If it is the multiplicative error, return epsilon_t
if(Etype=="M"){
ySimulated[i,] <- ySimulated[i,]/yForecast[i];
}
}

covarMat <- (ySimulated %*% t(ySimulated))/nsim;
Expand Down
31 changes: 20 additions & 11 deletions R/adamGeneral.R
Original file line number Diff line number Diff line change
Expand Up @@ -2497,7 +2497,7 @@ parametersChecker <- function(data, model, lags, formulaToUse, orders, constant=
}
}

if(arimaModel && obsNonzero < initialArimaNumber && !select){
if(arimaModel && obsNonzero < (initialType=="optimal")*initialArimaNumber && !select){
warning(paste0("In-sample size is ",obsNonzero,", while number of ARIMA components is ",initialArimaNumber,
". Cannot fit the model."),call.=FALSE)
stop("Not enough observations for such a complicated model.",call.=FALSE);
Expand Down Expand Up @@ -2644,27 +2644,36 @@ parametersChecker <- function(data, model, lags, formulaToUse, orders, constant=
else if(obsNonzero > (3 + nParamExo) && any(modelDo==c("estimate","use"))){
# We don't have enough observations for seasonal models with damped trend
if((obsNonzero <= (6 + lagsModelMax + 1 + nParamExo))){
model <- model[!(nchar(model)==4 &
substr(model,nchar(model),nchar(model))=="A")];
model <- model[!(nchar(model)==4 &
substr(model,nchar(model),nchar(model))=="M")];
if(nchar(model)==4){
model <- paste0(substr(model,1,2),substr(model,4,4));
}
# model <- model[!(nchar(model)==4 &
# substr(model,nchar(model),nchar(model))=="A")];
# model <- model[!(nchar(model)==4 &
# substr(model,nchar(model),nchar(model))=="M")];
}
# We don't have enough observations for seasonal models with trend
if((obsNonzero <= (5 + lagsModelMax + 1 + nParamExo))){
model <- model[!(substr(model,2,2)!="N" &
substr(model,nchar(model),nchar(model))!="N")];
model <- paste0(substr(model,1,1),"N",substr(model,3,3));
# model <- model[!(substr(model,2,2)!="N" &
# substr(model,nchar(model),nchar(model))!="N")];
}
# We don't have enough observations for seasonal models
if(obsNonzero <= 2*lagsModelMax){
model <- model[substr(model,nchar(model),nchar(model))=="N"];
model <- paste0(substr(model,1,2),"N");
# model <- model[substr(model,nchar(model),nchar(model))=="N"];
}
# We don't have enough observations for damped trend
if(obsNonzero <= (6 + nParamExo)){
model <- model[nchar(model)!=4];
if(nchar(model)==4){
model <- paste0(substr(model,1,2),substr(model,4,4));
}
# model <- model[nchar(model)!=4];
}
# We don't have enough observations for any trend
if(obsNonzero <= (5 + nParamExo)){
model <- model[substr(model,2,2)=="N"];
model <- paste0(substr(model,1,1),"N",substr(model,3,3));
# model <- model[substr(model,2,2)=="N"];
}
}
# Extreme cases of small samples
Expand Down Expand Up @@ -2790,7 +2799,7 @@ parametersChecker <- function(data, model, lags, formulaToUse, orders, constant=
"take more time to converge to the optimum. Consider either setting maxeval parameter ",
"to a higher value (e.g. maxeval=10000, which will take ~25 times more than this) ",
"or using initial='backcasting'."),
call.=FALSE);
call.=FALSE, immediate.=TRUE);
}
}
else{
Expand Down
5 changes: 5 additions & 0 deletions R/autoadam.R
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,11 @@ auto.adam <- function(data, model="ZXZ", lags=c(frequency(data)),
##### Loop for differences #####
# Prepare table with differences
# expand.grid() can be used instead...
# iOrders <- as.matrix(cbind(expand.grid(lapply(iMax, seq, 0)), constant=1));
# Remove the row with all zeroes, duplicate orders for the constant
# iOrders <- rbind(iOrders[-nrow(iOrders),],iOrders);
# First rows should not have constant
# iOrders[1:(floor(nrow(iOrders)/2)),ncol(iOrders)] <- 0;
if(any(iMax!=0)){
iOrders[,1] <- rep(c(0:iMax[1]),times=prod(iMax[-1]+1));
if(ordersLength>1){
Expand Down
14 changes: 8 additions & 6 deletions src/adamGeneral.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,15 @@ List adamFitter(arma::mat &matrixVt, arma::mat const &matrixWt, arma::mat &matri
vectorG, vecErrors(i-lagsModelMax));
}

// Fill in the head of the series
for (int i=lagsModelMax-1; i>=0; i=i-1) {
profilesRecent(profilesObserved.col(i)) = adamFvalue(profilesRecent(profilesObserved.col(i)),
matrixF, E, T, S, nETS, nNonSeasonal, nSeasonal, nArima, nComponents, constant);
// Fill in the head of the series.
// if(nArima==0){
for (int i=lagsModelMax-1; i>=0; i=i-1) {
profilesRecent(profilesObserved.col(i)) = adamFvalue(profilesRecent(profilesObserved.col(i)),
matrixF, E, T, S, nETS, nNonSeasonal, nSeasonal, nArima, nComponents, constant);

matrixVt.col(i) = profilesRecent(profilesObserved.col(i));
}
matrixVt.col(i) = profilesRecent(profilesObserved.col(i));
}
// }

// Change back the specific element in the state vector
if(T=='A'){
Expand Down
1 change: 0 additions & 1 deletion src/adamGeneral.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <RcppArmadillo.h>
#include <iostream>
#include <cmath>
// [[Rcpp::depends(RcppArmadillo)]]

using namespace Rcpp;

Expand Down
1 change: 0 additions & 1 deletion src/ssGeneral.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include <RcppArmadillo.h>
#include <iostream>
#include <cmath>
// [[Rcpp::depends(RcppArmadillo)]]

using namespace Rcpp;

Expand Down