diff --git a/DESCRIPTION b/DESCRIPTION index bfc56e8d..1ebe790e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: finnts Title: Microsoft Finance Time Series Forecasting Framework -Version: 0.4.0.9007 +Version: 0.4.0.9008 Authors@R: c(person(given = "Mike", family = "Tokic", @@ -24,7 +24,7 @@ License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: cli, Cubist, diff --git a/NEWS.md b/NEWS.md index db04bf0d..24eaa754 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# finnts 0.4.0.9007 (DEVELOPMENT VERSION) +# finnts 0.4.0.9008 (DEVELOPMENT VERSION) ## Improvements @@ -6,7 +6,7 @@ - Allow global models for hierarchical forecasts - Multistep horizon forecasts for R1 recipe, listed as `multistep_horizon` within `prep_data()` - Always save the most accurate model average, regardless if selected as best model. This allows for improved scaling with large data sets. -- Automatically condense large forecasts (+10k time series) into smaller amount of files to make it easier to read forecast outputs +- Automatically condense large forecasts (+3k time series) into smaller amount of files to make it easier to read forecast outputs - Improved weighted MAPE calculation across all time series - Changed default for box_cox argument in `prep_data()` to FALSE - Support for spark version 3.4 in Azure Synapse/Fabric diff --git a/R/final_models.R b/R/final_models.R index c8cac696..23d7c9b3 100644 --- a/R/final_models.R +++ b/R/final_models.R @@ -618,7 +618,7 @@ final_models <- function(run_info, par_end(cl) # condense outputs into less files for larger runs - if (length(combo_list) > 10000) { + if (length(combo_list) > 3000) { cli::cli_progress_step("Condensing Forecasts") condense_data( diff --git a/R/hierarchy.R b/R/hierarchy.R index 544d802d..da7cda2e 100644 --- a/R/hierarchy.R +++ b/R/hierarchy.R @@ -112,6 +112,7 @@ prep_hierarchical_data <- function(input_data, .noexport = NULL ) %do% { temp_tbl <- input_data_adj %>% + tidyr::drop_na(tidyselect::all_of(regressor_var)) %>% dplyr::select(Date, tidyselect::all_of(value_level_iter), tidyselect::all_of(regressor_var)) %>% dplyr::distinct() @@ -144,6 +145,7 @@ prep_hierarchical_data <- function(input_data, # agg by total total_tbl <- input_data_adj %>% + tidyr::drop_na(tidyselect::all_of(regressor_var)) %>% dplyr::select(Date, value_level[[1]], tidyselect::all_of(regressor_var)) %>% dplyr::distinct() %>% dplyr::group_by(Date) %>%