Skip to content

Commit

Permalink
Merge pull request #104 from sportsdataverse/bugfix/2024-completions
Browse files Browse the repository at this point in the history
Fixing issue with 2024 pass completions not having right yardage
  • Loading branch information
akeaswaran authored Sep 10, 2024
2 parents 55b2bb9 + d6d3d5a commit 0ef93da
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 1 deletion.
6 changes: 5 additions & 1 deletion R/helper_pbp_add_yardage.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,14 @@ add_yardage <- function(play_df) {
stringi::stri_extract_first_regex(.data$play_text, "(?<= for a loss of)[^,]+"), "\\d+"
)),
.data$pass == 1 &
stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~
stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) &
stringr::str_detect(.data$play_text, regex(" for \\d+ y\\w*ds?", ignore_case = TRUE)) ~
as.numeric(stringr::str_extract(
stringi::stri_extract_first_regex(.data$play_text, "(?<= for)[^,]+"), "\\d+"
)),
.data$pass == 1 &
stringr::str_detect(.data$play_text, regex("pass complete to", ignore_case = TRUE)) ~
yards_gained, # 2024 has games that don't have yards in the PBP text but do have them in the yards_gained field.
TRUE ~ NA_real_
)
)
Expand Down
40 changes: 40 additions & 0 deletions tests/testthat/test-cfbd_pbp_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
test_that("2024 pbp handles completions properly", {
# skip("working post-fix") # should fail without it
p = cfbd_pbp_data(
year = 2024,
season_type = "regular",
week = 1,
team = "NC State",
play_type = "pass reception",
epa_wpa = T
)

completions = p %>%
dplyr::filter(game_id == 401634299 & play_type == "Pass Reception" & pos_team == "NC State") %>%
dplyr::mutate(
same_same = (yards_gained == yds_receiving)
) %>%
dplyr::select(yards_gained, yds_receiving, same_same)

testthat::expect_equal(sum(completions$same_same), nrow(completions))
})

test_that("base case 2023 pbp are already properly handled", {
p = cfbd_pbp_data(
year = 2023,
season_type = "regular",
week = 2,
team = "Georgia Tech",
play_type = "pass reception",
epa_wpa = T
)

completions = p %>%
dplyr::filter(game_id == 401525494 & play_type == "Pass Reception" & pos_team == "Georgia Tech") %>%
dplyr::mutate(
same_same = (yards_gained == yds_receiving)
) %>%
dplyr::select(yards_gained, yds_receiving, same_same)

testthat::expect_equal(sum(completions$same_same), nrow(completions))
})

0 comments on commit 0ef93da

Please sign in to comment.