config-i1 · config-i1 · Nov 1, 2023 · Sep 27, 2023 · Sep 27, 2023 · Sep 30, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: smooth
 Type: Package
 Title: Forecasting Using State Space Models
-Version: 4.0.0
-Date: 2023-09-15
+Version: 4.0.1.41006
+Date: 2023-11-01
 Authors@R: person("Ivan", "Svetunkov", email = "ivan@svetunkov.ru", role = c("aut", "cre"),
                   comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
 URL: https://github.com/config-i1/smooth

diff --git a/NEWS b/NEWS
@@ -1,3 +1,16 @@
+smooth v4.0.1 (Release data: 2023-11-01)
+=======
+
+Changes:
+* Initialisation of ARIMA is now more precise, both for "optimal" and "backcasting", leading to a better fit of the model to the data. This was done using more meaningful initial values of states and by cutting the small step of refitting the head states in ARIMA with backcasting.
+
+Bugfixes:
+* Fixed a bug with multicov.adam() and simulated covariance matrix.
+* adam() ARIMA and msarima() with backcasting would count number of components towards the number of estimated parameters.
+* adam() would not remove components of ETS correctly in cases of small samples (e.g. obs == 2*frequency).
+* Fixed the code of sma() - there was a bug in the architecture resulting in wrong fitting.
+
+
 smooth v4.0.0 (Release data: 2023-09-15)
 =======
 

diff --git a/R/adam-sma.R b/R/adam-sma.R
@@ -127,6 +127,12 @@ sma <- function(y, order=NULL, ic=c("AICc","AIC","BIC","BICc"),
     }
     else{
         model <- ellipsis$model;
+
+        if(inherits(y,"Mdata")){
+            h <- y$h;
+            holdout <- TRUE;
+            y <- ts(c(y$x,y$xx),start=start(y$x),frequency=frequency(y$x));
+        }
     }
 
     # If a previous model provided as a model, write down the variables
@@ -190,35 +196,22 @@ sma <- function(y, order=NULL, ic=c("AICc","AIC","BIC","BICc"),
     ot[] <- 1;
 
     CreatorSMA <- function(order){
-        lagsModelAll <- as.matrix(rep(1,order));
-        lagsModelMax <- 1;
-        obsStates <- obsInSample+1;
-
-        # Create ADAM profiles
-        profilesRecentTable <- matrix(0,length(lagsModelAll),lagsModelMax,
-                                      dimnames=list(lagsModelAll,NULL));
-        # Create the matrix of observed profiles indices
-        profilesObservedTable <- matrix((1:order)-1,length(lagsModelAll),obsAll+lagsModelMax,
-                                        dimnames=list(lagsModelAll,NULL));
-
-        if(order>1){
-            matF <- rbind(cbind(rep(1/order,order-1),diag(order-1)),
-                          c(1/order,rep(0,order-1)));
-            matWt <- matrix(c(1,rep(0,order-1)),obsInSample,order,byrow=TRUE);
-        }
-        else{
-            matF <- matrix(1,1,1);
-            matWt <- matrix(1,obsInSample,1);
-        }
+        lagsModelAll <- matrix(1:order, ncol=1);
+        lagsModelMax <- max(lagsModelAll);
+        obsStates <- obsInSample+lagsModelMax;
+
+        # # Create ADAM profiles
+        adamProfiles <- adamProfileCreator(lagsModelAll, lagsModelMax, obsAll);
+
+        profilesObservedTable <- adamProfiles$observed;
+        profilesRecentTable <- adamProfiles$recent;
+        profilesRecentTable[order,1:order] <- mean(yInSample[1:order]);
+
+        # State space matrices
+        matF <- matrix(1/order,order,order);
+        matWt <- matrix(1,obsInSample,order,byrow=TRUE);
         vecG <- matrix(1/order,order);
-        matVt <- matrix(NA,order,obsStates);
-        matVt[1,1:order] <- rep(mean(yInSample[1:order]),order);
-        # if(order>1){
-        #     for(i in 2:order){
-        #         matVt[i,1:(order-i+1)] <- matVt[i-1,1:(order-i+1)+1] -
-        #             matVt[1,1:(order-i+1)] * matF[i-1,1];
-        #     }
-        # }
+        matVt <- matrix(0,order,obsStates);
 
         #### Fitter and the losses calculation ####
         adamFitted <- adamFitterWrap(matVt, matWt, matF, vecG,
@@ -234,7 +227,6 @@ sma <- function(y, order=NULL, ic=c("AICc","AIC","BIC","BICc"),
         ICValue <- icFunction(logLik);
 
         return(ICValue);
-        # return(list(order=order,cfObjective=cfObjective,ICValue=ICValue,logLik=logLik));
     }
 
 

diff --git a/R/adam.R b/R/adam.R
@@ -1161,6 +1161,20 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
                 if(initialArimaEstimate){
                     matVt[componentsNumberETS+1:componentsNumberARIMA, 1:initialArimaNumber] <-
                         switch(Etype, "A"=0, "M"=1);
+                    if(any(lags>1)){
+                        yDecomposition <- tail(msdecompose(yInSample,
+                                                           lags[lags!=1],
+                                                           type=switch(Etype,
+                                                                       "A"="additive",
+                                                                       "M"="multiplicative"))$seasonal,1)[[1]];
+                    }
+                    else{
+                        yDecomposition <- switch(Etype,
+                                                 "A"=mean(diff(yInSample)),
+                                                 "M"=exp(mean(diff(log(yInSample)))));
+                    }
+                    matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber] <-
+                        rep(yDecomposition,ceiling(initialArimaNumber/max(lags)))[1:initialArimaNumber];
                         # rep(yInSample[1:initialArimaNumber],each=componentsNumberARIMA);
 
                     # Failsafe mechanism in case the sample is too small
@@ -1448,7 +1462,8 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
             }
             # This is needed in order to propagate initials of ARIMA to all components
             else if(any(c(arEstimate,maEstimate))){
-                if(nrow(nonZeroARI)>0 && nrow(nonZeroARI)>=nrow(nonZeroMA)){
+                # if(nrow(nonZeroARI)>0 && nrow(nonZeroARI)>=nrow(nonZeroMA)){
+                # if(nrow(nonZeroARI)>0){
                     matVt[componentsNumberETS+nonZeroARI[,2], 1:initialArimaNumber] <-
                         switch(Etype,
                                "A"= arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%
@@ -1457,7 +1472,7 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
                                "M"=exp(arimaPolynomials$ariPolynomial[nonZeroARI[,1]] %*%
                                            t(log(matVt[componentsNumberETS+componentsNumberARIMA, 1:initialArimaNumber])) /
                                            tail(arimaPolynomials$ariPolynomial,1)));
-                }
+                # }
                 # else{
                 #     matVt[componentsNumberETS+nonZeroMA[,2],
                 #           1:initialArimaNumber] <-
@@ -1764,6 +1779,8 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
                 Bu[j+1:initialArimaNumber] <- Inf;
             }
             else{
+                # Make sure that ARIMA states are positive to avoid errors
+                B[j+1:initialArimaNumber] <- abs(B[j+1:initialArimaNumber]);
                 Bl[j+1:initialArimaNumber] <- 0;
                 Bu[j+1:initialArimaNumber] <- Inf;
             }
@@ -2064,6 +2081,7 @@ adam <- function(data, model="ZXZ", lags=c(frequency(data)), orders=list(ar=c(0)
         profilesRecentTable[] <- adamElements$matVt[,1:lagsModelMax];
         # print(round(B,3))
         # print(adamElements$vecG)
+        # print(profilesRecentTable)
 
         #### Fitter and the losses calculation ####
         adamFitted <- adamFitterWrap(adamElements$matVt, adamElements$matWt, adamElements$matF, adamElements$vecG,
@@ -5755,7 +5773,7 @@ plot.adam <- function(x, which=c(1,2,4,6), level=0.95, legend=FALSE,
             x$states <- cbind(actuals(x),x$states,residuals(x));
             colnames(x$states) <- statesNames;
             if(ncol(x$states)>10){
-                message("Too many states. Plotting them one by one on several graphs.");
+                message("Too many states. Plotting them one by one on several plots.");
                 if(is.null(ellipsis$main)){
                     ellipsisMain <- NULL;
                 }
@@ -10121,6 +10139,10 @@ multicov.adam <- function(object, type=c("analytical","empirical","simulated"),
                 yForecast[i] <- mean(ySimulated[i,],na.rm=TRUE);
             }
             ySimulated[i,] <- ySimulated[i,]-yForecast[i];
+            # If it is the multiplicative error, return epsilon_t
+            if(Etype=="M"){
+                ySimulated[i,] <- ySimulated[i,]/yForecast[i];
+            }
         }
 
         covarMat <- (ySimulated %*% t(ySimulated))/nsim;

diff --git a/R/adamGeneral.R b/R/adamGeneral.R
@@ -2497,7 +2497,7 @@ parametersChecker <- function(data, model, lags, formulaToUse, orders, constant=
         }
     }
 
-    if(arimaModel && obsNonzero < initialArimaNumber && !select){
+    if(arimaModel && obsNonzero < (initialType=="optimal")*initialArimaNumber && !select){
         warning(paste0("In-sample size is ",obsNonzero,", while number of ARIMA components is ",initialArimaNumber,
                        ". Cannot fit the model."),call.=FALSE)
         stop("Not enough observations for such a complicated model.",call.=FALSE);
@@ -2644,27 +2644,36 @@ parametersChecker <- function(data, model, lags, formulaToUse, orders, constant=
             else if(obsNonzero > (3 + nParamExo) && any(modelDo==c("estimate","use"))){
                 # We don't have enough observations for seasonal models with damped trend
                 if((obsNonzero <= (6 + lagsModelMax + 1 + nParamExo))){
-                    model <- model[!(nchar(model)==4 &
-                                         substr(model,nchar(model),nchar(model))=="A")];
-                    model <- model[!(nchar(model)==4 &
-                                         substr(model,nchar(model),nchar(model))=="M")];
+                    if(nchar(model)==4){
+                        model <- paste0(substr(model,1,2),substr(model,4,4));
+                    }
+                    # model <- model[!(nchar(model)==4 &
+                    #                      substr(model,nchar(model),nchar(model))=="A")];
+                    # model <- model[!(nchar(model)==4 &
+                    #                      substr(model,nchar(model),nchar(model))=="M")];
                 }
                 # We don't have enough observations for seasonal models with trend
                 if((obsNonzero <= (5 + lagsModelMax + 1 + nParamExo))){
-                    model <- model[!(substr(model,2,2)!="N" &
-                                         substr(model,nchar(model),nchar(model))!="N")];
+                    model <- paste0(substr(model,1,1),"N",substr(model,3,3));
+                    # model <- model[!(substr(model,2,2)!="N" &
+                    #                      substr(model,nchar(model),nchar(model))!="N")];
                 }
                 # We don't have enough observations for seasonal models
                 if(obsNonzero <= 2*lagsModelMax){
-                    model <- model[substr(model,nchar(model),nchar(model))=="N"];
+                    model <- paste0(substr(model,1,2),"N");
+                    # model <- model[substr(model,nchar(model),nchar(model))=="N"];
                 }
                 # We don't have enough observations for damped trend
                 if(obsNonzero <= (6 + nParamExo)){
-                    model <- model[nchar(model)!=4];
+                    if(nchar(model)==4){
+                        model <- paste0(substr(model,1,2),substr(model,4,4));
+                    }
+                    # model <- model[nchar(model)!=4];
                 }
                 # We don't have enough observations for any trend
                 if(obsNonzero <= (5 + nParamExo)){
-                    model <- model[substr(model,2,2)=="N"];
+                    model <- paste0(substr(model,1,1),"N",substr(model,3,3));
+                    # model <- model[substr(model,2,2)=="N"];
                 }
             }
             # Extreme cases of small samples
@@ -2790,7 +2799,7 @@ parametersChecker <- function(data, model, lags, formulaToUse, orders, constant=
                            "take more time to converge to the optimum. Consider either setting maxeval parameter ",
                            "to a higher value (e.g. maxeval=10000, which will take ~25 times more than this) ",
                            "or using initial='backcasting'."),
-                    call.=FALSE);
+                    call.=FALSE, immediate.=TRUE);
         }
     }
     else{

diff --git a/R/autoadam.R b/R/autoadam.R
@@ -503,6 +503,11 @@ auto.adam <- function(data, model="ZXZ", lags=c(frequency(data)),
             ##### Loop for differences #####
             # Prepare table with differences
             # expand.grid() can be used instead...
+            # iOrders <- as.matrix(cbind(expand.grid(lapply(iMax, seq, 0)), constant=1));
+            # Remove the row with all zeroes,  duplicate orders for the constant
+            # iOrders <- rbind(iOrders[-nrow(iOrders),],iOrders);
+            # First rows should not have constant
+            # iOrders[1:(floor(nrow(iOrders)/2)),ncol(iOrders)] <- 0;
             if(any(iMax!=0)){
                 iOrders[,1] <- rep(c(0:iMax[1]),times=prod(iMax[-1]+1));
                 if(ordersLength>1){

diff --git a/src/adamGeneral.cpp b/src/adamGeneral.cpp
@@ -106,13 +106,15 @@ List adamFitter(arma::mat &matrixVt, arma::mat const &matrixWt, arma::mat &matri
                                               vectorG, vecErrors(i-lagsModelMax));
             }
 
-            // Fill in the head of the series
-            for (int i=lagsModelMax-1; i>=0; i=i-1) {
-                profilesRecent(profilesObserved.col(i)) = adamFvalue(profilesRecent(profilesObserved.col(i)),
-                             matrixF, E, T, S, nETS, nNonSeasonal, nSeasonal, nArima, nComponents, constant);
+            // Fill in the head of the series.
+            // if(nArima==0){
+                for (int i=lagsModelMax-1; i>=0; i=i-1) {
+                    profilesRecent(profilesObserved.col(i)) = adamFvalue(profilesRecent(profilesObserved.col(i)),
+                                   matrixF, E, T, S, nETS, nNonSeasonal, nSeasonal, nArima, nComponents, constant);
 
-                matrixVt.col(i) = profilesRecent(profilesObserved.col(i));
-            }
+                    matrixVt.col(i) = profilesRecent(profilesObserved.col(i));
+                }
+            // }
 
             // Change back the specific element in the state vector
             if(T=='A'){

diff --git a/src/adamGeneral.h b/src/adamGeneral.h
@@ -1,7 +1,6 @@
 #include <RcppArmadillo.h>
 #include <iostream>
 #include <cmath>
-// [[Rcpp::depends(RcppArmadillo)]]
 
 using namespace Rcpp;
 

diff --git a/src/ssGeneral.h b/src/ssGeneral.h
@@ -1,7 +1,6 @@
 #include <RcppArmadillo.h>
 #include <iostream>
 #include <cmath>
-// [[Rcpp::depends(RcppArmadillo)]]
 
 using namespace Rcpp;