diff --git a/DESCRIPTION b/DESCRIPTION index c1fab595..5843bf34 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Access NFL Play by Play Data -Version: 4.6.1.9003 +Version: 4.6.1.9004 Authors@R: c(person(given = "Sebastian", family = "Carl", diff --git a/NEWS.md b/NEWS.md index d083c0ab..6e10bb84 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ - The function `calculate_player_stats_def` now returns `season_type` if argument `weekly` is set to `TRUE` for consistency with the other player stats functions. (#455) - The function `missing_raw_pbp()` now allows filtering by season. (#457) - More robust handling of player IDs in `decode_player_ids()`. (#458) +- Fixed rare cases where the value of the `yrdln` variable didn't equal `"MID 50"` at midfield. (#459) +- Fixed rare cases where `drive_start_yard_line` missed the blank space between team name and yard line number. (#459) +- Fixed play description in some 1999 and 2000 games where the string "D.Holland" replaced the kick distance. (#459) # nflfastR 4.6.1 diff --git a/R/helper_add_nflscrapr_mutations.R b/R/helper_add_nflscrapr_mutations.R index 2ebc1722..7bdab2cf 100644 --- a/R/helper_add_nflscrapr_mutations.R +++ b/R/helper_add_nflscrapr_mutations.R @@ -23,7 +23,12 @@ add_nflscrapr_mutations <- function(pbp) { (.data$play_description == "END GAME" & is.na(.data$time)), "00:00", .data$time), time = dplyr::if_else(.data$play_description == 'GAME', "15:00", .data$time), # Create a column with the time in seconds remaining for the quarter: - quarter_seconds_remaining = time_to_seconds(.data$time) + quarter_seconds_remaining = time_to_seconds(.data$time), + play_description = dplyr::case_when( + stringr::str_detect(.data$play_description, "(?<=kicks )[:alpha:]{1,}.[:alpha:]{1,}(?= yards)") ~ + stringr::str_replace(.data$play_description, "(?<=kicks )[:alpha:]{1,}.[:alpha:]{1,}(?= yards)", as.character(.data$kick_distance)), + TRUE ~ .data$play_description + ) ) %>% #put plays in the right order dplyr::group_by(.data$game_id) %>% @@ -196,7 +201,7 @@ add_nflscrapr_mutations <- function(pbp) { .data$away_team, .data$home_team ), - yardline = dplyr::if_else(.data$yardline == "50", "MID 50", .data$yardline), + yardline = dplyr::if_else(stringr::str_detect(.data$yardline, "50"), "MID 50", .data$yardline), yardline = dplyr::if_else( nchar(.data$yardline) == 0 | is.null(.data$yardline) | .data$yardline == "NULL" | is.na(.data$yardline), dplyr::lead(.data$yardline), .data$yardline @@ -426,6 +431,23 @@ add_nflscrapr_mutations <- function(pbp) { 0, .data$away_timeout_used ) ) %>% + # replace empty strings in yard line variables + dplyr::mutate_at( + .vars = c("yardline", "drive_start_yard_line" ,"drive_end_yard_line"), + .funs = ~ dplyr::na_if(.x, "") + ) %>% + # fix cases where a yardline variable misses the blank space between team name + # and yard number. At the point of adding this, the only spot where this happened + # was in the variable drive_start_yard_line in the games + # "2000_01_CAR_WAS", "2000_02_NE_NYJ", and "2000_03_ATL_CAR" + dplyr::mutate_at( + .vars = c("yardline", "drive_start_yard_line" ,"drive_end_yard_line"), + .funs = ~ dplyr::case_when( + stringr::str_detect(.x, "[:upper:]{2,3}(?=[:digit:]{1,2})") ~ + stringr::str_c(stringr::str_extract(.x, "[:upper:]{2,3}"), stringr::str_extract(.x, "[:digit:]{1,2}"), sep = " "), + TRUE ~ .x + ) + ) %>% # Group by the game_half to then create cumulative timeouts used for both # the home and away teams: dplyr::group_by(.data$game_id, .data$game_half) %>% diff --git a/tests/testthat/expected_pbp.rds b/tests/testthat/expected_pbp.rds index d1f632ce..067ee99d 100644 Binary files a/tests/testthat/expected_pbp.rds and b/tests/testthat/expected_pbp.rds differ