From 092fa9419ac3f20980738b682f33e5252e9a9b62 Mon Sep 17 00:00:00 2001 From: Akshay Easwaran Date: Mon, 9 Sep 2024 18:43:35 -0400 Subject: [PATCH] fixing issue with pass completions not having right yardage --- R/helper_pbp_add_yardage.R | 6 ++++- tests/testthat/test-cfbd_pbp_data.R | 40 +++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tests/testthat/test-cfbd_pbp_data.R diff --git a/R/helper_pbp_add_yardage.R b/R/helper_pbp_add_yardage.R index 84303b0..3d9413e 100644 --- a/R/helper_pbp_add_yardage.R +++ b/R/helper_pbp_add_yardage.R @@ -86,10 +86,14 @@ add_yardage <- function(play_df) { stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+" )), .data$pass == 1 & - stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~ + stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) & + stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~ as.numeric(stringr::str_extract( stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+" )), + .data$pass == 1 & + stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~ + yards_gained, # 2024 has games that don't have yards in the PBP text but do have them in the yards_gained field. TRUE ~ NA_real_ ) ) diff --git a/tests/testthat/test-cfbd_pbp_data.R b/tests/testthat/test-cfbd_pbp_data.R new file mode 100644 index 0000000..f322247 --- /dev/null +++ b/tests/testthat/test-cfbd_pbp_data.R @@ -0,0 +1,40 @@ +test_that("2024 pbp handles completions properly", { + # skip("working post-fix") # should fail without it + p = cfbd_pbp_data( + year = 2024, + season_type = "regular", + week = 1, + team = "NC State", + play_type = "pass reception", + epa_wpa = T + ) + + completions = p %>% + dplyr::filter(game_id == 401634299 & play_type == "Pass Reception" & pos_team == "NC State") %>% + dplyr::mutate( + same_same = (yards_gained == yds_receiving) + ) %>% + dplyr::select(yards_gained, yds_receiving, same_same) + + testthat::expect_equal(sum(completions$same_same), nrow(completions)) +}) + +test_that("base case 2023 pbp are already properly handled", { + p = cfbd_pbp_data( + year = 2023, + season_type = "regular", + week = 2, + team = "Georgia Tech", + play_type = "pass reception", + epa_wpa = T + ) + + completions = p %>% + dplyr::filter(game_id == 401525494 & play_type == "Pass Reception" & pos_team == "NC State") %>% + dplyr::mutate( + same_same = (yards_gained == yds_receiving) + ) %>% + dplyr::select(yards_gained, yds_receiving, same_same) + + testthat::expect_equal(sum(completions$same_same), nrow(completions)) +})