From 9d2a6e98cea93c86340fd5874c6adb469846c88a Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Sat, 10 Dec 2022 15:42:07 +0100 Subject: [PATCH 1/4] boost sum_play_stats by looping over a list and binding it with data.table instead of dplyr --- DESCRIPTION | 2 +- NEWS.md | 1 + R/helper_scrape_nfl.R | 8 ++------ R/helper_tidy_play_stats.R | 4 ++-- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cfef67b1..6f039a6c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Access NFL Play by Play Data -Version: 4.5.0.9001 +Version: 4.5.0.9002 Authors@R: c(person(given = "Sebastian", family = "Carl", diff --git a/NEWS.md b/NEWS.md index 48e025e5..9b475a92 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ * New implementation of tests to be able to identify breaking changes in reverse dependencies * `calculate_standings()` no more freezes when computing standings from schedules where some games are missing results, i.e. upcoming games. (v4.5.0.9000) * Bug fix that caused problems with upcoming dplyr and tidyselect updates that weren't reverse compatible. +* Significant performance improvements of internal functions. (v4.5.0.9002) # nflfastR 4.5.0 diff --git a/R/helper_scrape_nfl.R b/R/helper_scrape_nfl.R index 1ff08ba1..c3576dc1 100644 --- a/R/helper_scrape_nfl.R +++ b/R/helper_scrape_nfl.R @@ -169,13 +169,9 @@ get_pbp_nfl <- function(id, dir = NULL, qs = FALSE, ...) { # if I don't put this here it breaks suppressWarnings( - pbp_stats <- - furrr::future_map(unique(stats$playId), function(x, s) { - sum_play_stats(x, s) - }, stats) + pbp_stats <- lapply(unique(stats$playId), sum_play_stats, stats) ) - - pbp_stats <- dplyr::bind_rows(pbp_stats) + pbp_stats <- data.table::rbindlist(pbp_stats) %>% tibble::as_tibble() combined <- game_info %>% dplyr::bind_cols(plays %>% dplyr::select(-"playStats", -"game_id")) %>% diff --git a/R/helper_tidy_play_stats.R b/R/helper_tidy_play_stats.R index a406f20d..cc9df756 100644 --- a/R/helper_tidy_play_stats.R +++ b/R/helper_tidy_play_stats.R @@ -13,9 +13,9 @@ # @param stats A dataframe including multiple rows for each play_Id holding # gsis stat ids and stats sum_play_stats <- function(play_Id, stats) { - play_stats <- stats %>% filter(.data$playId == play_Id) + play_stats <- stats[stats$playId == play_Id,] - row <- bind_cols(play_id = as.integer(play_Id), tidy_play_stats_row) + row <- c("play_id" = as.integer(play_Id), tidy_play_stats_row) for (index in seq_along(play_stats$playId)) { stat_id <- play_stats$statId[index] From 1cd686621f9b125ffba1e21bf7f69f0bebaf98e3 Mon Sep 17 00:00:00 2001 From: mrcaseb Date: Sat, 10 Dec 2022 15:46:10 +0100 Subject: [PATCH 2/4] apply to older seasons as well --- R/helper_scrape_gc.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/helper_scrape_gc.R b/R/helper_scrape_gc.R index 672e5e35..1cfbd9d5 100644 --- a/R/helper_scrape_gc.R +++ b/R/helper_scrape_gc.R @@ -128,12 +128,8 @@ get_pbp_gc <- function(gameId, dir = NULL, qs = FALSE, ...) { playStatSeq = "sequence" ) - - pbp_stats <- furrr::future_map(unique(stats$playId), function(x, s) { - sum_play_stats(x, s) - }, stats) - - pbp_stats <- dplyr::bind_rows(pbp_stats) + pbp_stats <- lapply(unique(stats$playId), sum_play_stats, stats) + pbp_stats <- data.table::rbindlist(pbp_stats) %>% tibble::as_tibble() # drive info d <- tibble::tibble(drives) %>% From 9cb6b422f1666f8dab2a293750bc54277d72eafc Mon Sep 17 00:00:00 2001 From: mrcaseb <38586519+mrcaseb@users.noreply.github.com> Date: Tue, 20 Dec 2022 11:54:22 +0100 Subject: [PATCH 3/4] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6f039a6c..f62ca657 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Access NFL Play by Play Data -Version: 4.5.0.9002 +Version: 4.5.0.9003 Authors@R: c(person(given = "Sebastian", family = "Carl", From e3182e1ef0ac844e946acb101d4962d9f4b5423e Mon Sep 17 00:00:00 2001 From: mrcaseb <38586519+mrcaseb@users.noreply.github.com> Date: Tue, 20 Dec 2022 11:54:47 +0100 Subject: [PATCH 4/4] update news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 9b475a92..49275fa0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,7 +3,7 @@ * New implementation of tests to be able to identify breaking changes in reverse dependencies * `calculate_standings()` no more freezes when computing standings from schedules where some games are missing results, i.e. upcoming games. (v4.5.0.9000) * Bug fix that caused problems with upcoming dplyr and tidyselect updates that weren't reverse compatible. -* Significant performance improvements of internal functions. (v4.5.0.9002) +* Significant performance improvements of internal functions. (#402) # nflfastR 4.5.0