Skip to content

Commit

Permalink
Merge pull request #427 from nflverse/posteam-fixes
Browse files Browse the repository at this point in the history
fix posteam for 2023+
  • Loading branch information
mrcaseb authored Sep 11, 2023
2 parents ff8a7b5 + ed4872e commit e8b5787
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: nflfastR
Title: Functions to Efficiently Access NFL Play by Play Data
Version: 4.5.1.9011
Version: 4.5.1.9012
Authors@R:
c(person(given = "Sebastian",
family = "Carl",
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
- Added the new function `missing_raw_pbp()` that computes a vector of game IDs missing in the local raw play-by-play directory. (#423)
- Decode player IDs in 2023 pbp. (#425)
- Drop the pseudo plays TV Timeout and Two-Minute Warning. (#426)

- Fix posteam on kickoffs and PATs following a defensive TD in 2023+ pbp. (#427)

# nflfastR 4.5.1

Expand Down
31 changes: 30 additions & 1 deletion R/helper_scrape_nfl.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ get_pbp_nfl <- function(id,
# These mess up some of our logic. Since they are useless, we remove them here
dplyr::filter(
!(is.na(.data$timeout_team) & stringr::str_detect(tolower(.data$play_description), "timeout at|two-minute"))
)
) %>%
fix_posteams()

# fix for games where home_team == away_team and fields are messed up
if (bad_game == 1) {
Expand Down Expand Up @@ -372,5 +373,33 @@ fix_bad_games <- function(pbp) {

}

fix_posteams <- function(pbp){
# 2023 pbp introduces two new problems
# 1. Definition of posteam on kick offs changed to receiving team. That's our
# definition and we swap teams later.
# 2. Posteam doesn't change on the PAT after defensive TD
#
# We adjust both things here, but only for 2023ff to avoid backwards compatibility problems
# We need the variable pre_play_by_play which usually looks like "KC 1-10 NYJ 40"
if (any(pbp$season >= 2023) && ("pre_play_by_play" %in% names(pbp))){
# Let's be as explicit as possible about what we want to extract from the string
# It's really only the first valid team abbreviation followed by a blank space
valid_team_abbrs <- paste(nflfastR::teams_colors_logos$team_abbr, collapse = "|")
posteam_regex <- paste0("^", valid_team_abbrs, "(?=[:space:])")

pbp <- pbp %>%
dplyr::mutate(
parsed_posteam = stringr::str_extract(.data$pre_play_by_play, posteam_regex),
posteam = dplyr::case_when(
is.na(.data$parsed_posteam) ~ .data$posteam,
.data$play_description == "GAME" ~ .data$posteam,
TRUE ~ .data$parsed_posteam
),
# drop helper
parsed_posteam = NULL
)
}

pbp
}

0 comments on commit e8b5787

Please sign in to comment.