Skip to content

Commit

Permalink
Push nflfastR v2.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
mrcaseb committed Jul 3, 2020
1 parent 59bf0d4 commit 496f647
Show file tree
Hide file tree
Showing 35 changed files with 1,867 additions and 1,569 deletions.
5 changes: 5 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@
^vignettes/nflfastR-models\.Rmd$
^vignettes$
^\.travis\.yml$
^man/figures/card\.png$
^man/figures/header_github\.png$
^man/figures/header_twitter\.png$
^man/figures/logo\.png$
^man/figures/nflfastR_logo_fillsize\.png$
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: nflfastR
Title: Functions to Efficiently Scrape NFL Play by Play Data
Version: 2.0.6
Version: 2.1.0
Authors@R:
c(person(given = "Sebastian",
family = "Carl",
Expand Down Expand Up @@ -45,17 +45,20 @@ Imports:
progressr,
purrr,
stats,
rlang,
stringr,
tibble,
tidyr,
tidyselect,
xgboost (>= 1.1)
Suggests:
DBI,
furrr,
future,
rmarkdown,
RSQLite,
knitr
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.0
RoxygenNote: 7.1.1
VignetteBuilder: knitr
40 changes: 40 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,54 @@

export(add_qb_epa)
export(calculate_expected_points)
export(calculate_win_probability)
export(clean_pbp)
export(fast_scraper)
export(fast_scraper_schedules)
export(update_db)
import(dplyr)
importFrom(dplyr,bind_cols)
importFrom(dplyr,filter)
importFrom(dplyr,first)
importFrom(dplyr,group_by)
importFrom(dplyr,if_else)
importFrom(dplyr,mutate)
importFrom(dplyr,rename)
importFrom(dplyr,select)
importFrom(dplyr,ungroup)
importFrom(glue,glue)
importFrom(httr,GET)
importFrom(httr,HEAD)
importFrom(httr,content)
importFrom(janitor,clean_names)
importFrom(jsonlite,fromJSON)
importFrom(lubridate,ms)
importFrom(lubridate,period_to_seconds)
importFrom(magrittr,"%>%")
importFrom(mgcv,predict.bam)
importFrom(purrr,map_chr)
importFrom(purrr,map_df)
importFrom(purrr,map_dfr)
importFrom(purrr,modify_at)
importFrom(purrr,modify_if)
importFrom(purrr,pluck)
importFrom(purrr,prepend)
importFrom(purrr,set_names)
importFrom(rlang,.data)
importFrom(stats,na.omit)
importFrom(stats,predict)
importFrom(stringr,str_detect)
importFrom(stringr,str_extract)
importFrom(stringr,str_remove_all)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_split)
importFrom(stringr,str_sub)
importFrom(stringr,str_trim)
importFrom(tibble,as_tibble)
importFrom(tibble,as_tibble_row)
importFrom(tibble,tibble)
importFrom(tidyr,replace_na)
importFrom(tidyr,unnest)
importFrom(tidyr,unnest_wider)
importFrom(tidyselect,matches)
importFrom(tidyselect,one_of)
30 changes: 30 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,33 @@
# nflfastR 2.1.0

### Models

* Removed `week` from Expected Points models along with an update of
`vignette("nflfastR-models")` and `vignette("examples")`

### Functions

* Added function `update_db()` which adds all completed games to a SQLite database
* Added function `calculate_win_probability()`
* Added new examples to `vignette("examples")` demonstrating the usage of the
above mentioned functions

### Bugs

* Fixed a problem with inconsistent data types of the variable
`drive_real_start_time` pre and post 2011
* Fixed a problem where some `game_id`s were overwritten during the play by play parsing
* Fix some more WP bugs on kickoffs with penalties and rare play description

### Miscellaneous

* `fast_scraper()` now loads the raw game data from a separate raw data repo
* Completely overhauled the entire code base to directly implement
[tidy evaluation](https://dplyr.tidyverse.org/articles/programming.html) using
`.data` from the [rlang](https://rlang.r-lib.org/) package (this is a major
code change that takes some getting used to but we need it in preparation of
a future release)

# nflfastR 2.0.6

* Fixed a problem where defensive two point conversions were not counted
Expand Down
79 changes: 0 additions & 79 deletions R/ep_calculator.R

This file was deleted.

154 changes: 154 additions & 0 deletions R/ep_wp_calculators.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#' Compute expected points
#'
#' for provided plays. Returns the data with
#' probabilities of each scoring event and EP added. The following columns
#' must be present: season, home_team, posteam, roof (coded as 'open',
#' 'closed', or 'retractable'), half_seconds_remaining, yardline_100,
#' ydstogo, posteam_timeouts_remaining, defteam_timeouts_remaining
#'
#' @param pbp_data Play-by-play dataset to estimate expected points for.
#' @details Computes expected points for provided plays. Returns the data with
#' probabilities of each scoring event and EP added. The following columns
#' must be present:
#' \itemize{
#' \item{season}
#' \item{home_team}
#' \item{posteam}
#' \item{roof (coded as 'outdoors', 'dome', or {'open' / 'closed' / NA} (retractable))}
#' \item{half_seconds_remaining}
#' \item{yardline_100}
#' \item{down}
#' \item{ydstogo}
#' \item{posteam_timeouts_remaining}
#' \item{defteam_timeouts_remaining}
#' }
#' @return The original pbp_data with the following columns appended to it:
#' \describe{
#' \item{ep}{expected points.}
#' \item{no_score_prob}{probability of no more scoring this half.}
#' \item{opp_fg_prob}{probability next score opponent field goal this half.}
#' \item{opp_safety_prob}{probability next score opponent safety this half.}
#' \item{opp_td_prob}{probability of next score opponent touchdown this half.}
#' \item{fg_prob}{probability next score field goal this half.}
#' \item{safety_prob}{probability next score safety this half.}
#' \item{td_prob}{probability text score touchdown this half.}
#' }
#' @importFrom rlang .data
#' @importFrom dplyr select mutate bind_cols
#' @importFrom tidyselect one_of
#' @importFrom stats predict
#' @export
calculate_expected_points <- function(pbp_data) {
suppressWarnings(
model_data <- pbp_data %>%
# drop existing values of ep and the probs before making new ones
dplyr::select(-one_of(drop.cols)) %>%
make_model_mutations() %>%
ep_model_select()
)


preds <- as.data.frame(
matrix(stats::predict(ep_model, as.matrix(model_data)), ncol = 7, byrow = TRUE)
)

colnames(preds) <- c(
"td_prob", "opp_td_prob", "fg_prob", "opp_fg_prob",
"safety_prob", "opp_safety_prob", "no_score_prob"
)

preds <- preds %>%
dplyr::mutate(
ep =
(-3 * .data$opp_fg_prob) +
(-2 * .data$opp_safety_prob) +
(-7 * .data$opp_td_prob) +
(3 * .data$fg_prob) +
(2 * .data$safety_prob) +
(7 * .data$td_prob)
) %>%
dplyr::bind_cols(pbp_data)

return(preds)
}

# helper column for ep calculator
drop.cols <- c(
"ep", "td_prob", "opp_td_prob", "fg_prob", "opp_fg_prob",
"safety_prob", "opp_safety_prob", "no_score_prob"
)


#' Compute win probability
#'
#' for provided plays. Returns the data with
#' probabilities of winning the game. The following columns
#' must be present: receive_h2_ko (1 if game is in 1st half and possession
#' team will receive 2nd half kickoff, 0 otherwise), ep (expected points),
#' home_team, posteam, half_seconds_remaining, game_seconds_remaining,
#' spread_line (how many points home team was favored by), down, ydstogo,
#' posteam_timeouts_remaining, defteam_timeouts_remaining
#'
#' @param pbp_data Play-by-play dataset to estimate win probability for.
#' @details Computes win probability for provided plays. Returns the data with
#' probabilities of each scoring event and EP added. The following columns
#' must be present:
#' \itemize{
#' \item{receive_2h_ko (1 if game is in 1st half and possession team will receive 2nd half kickoff, 0 otherwise)}
#' \item{ep (expected points)}
#' \item{score_differential}
#' \item{home_team}
#' \item{posteam}
#' \item{half_seconds_remaining}
#' \item{game_seconds_remaining}
#' \item{spread_line (how many points home team was favored by)}
#' \item{down}
#' \item{ydstogo}
#' \item{posteam_timeouts_remaining}
#' \item{defteam_timeouts_remaining}
#' }
#' @return The original pbp_data with the following columns appended to it:
#' \describe{
#' \item{wp}{win probability.}
#' \item{vegas_wp}{win probability taking into account pre-game spread.}
#' }
#' @importFrom rlang .data
#' @importFrom dplyr select mutate if_else rename bind_cols
#' @importFrom tidyselect one_of
#' @importFrom stats predict
#' @importFrom tibble as_tibble
#' @export
calculate_win_probability <- function(pbp_data) {
suppressWarnings(
model_data <- pbp_data %>%
# drop existing values of ep and the probs before making new ones
dplyr::select(-one_of(drop.cols.wp)) %>%
dplyr::mutate(
home = dplyr::if_else(.data$posteam == .data$home_team, 1, 0),
ExpScoreDiff = .data$ep + .data$score_differential,
posteam_spread = dplyr::if_else(.data$home == 1, .data$spread_line, -1 * .data$spread_line),
spread_time = .data$posteam_spread * log(3600 / (50 + (3600 - .data$game_seconds_remaining))),
ExpScoreDiff_Time_Ratio = .data$ExpScoreDiff / (.data$game_seconds_remaining + 1)
)
)

wp <- get_preds_wp(model_data) %>%
tibble::as_tibble() %>%
dplyr::rename(wp = "value")
wp_spread <- get_preds_wp_spread(model_data) %>%
tibble::as_tibble() %>%
dplyr::rename(vegas_wp = "value")

preds <- dplyr::bind_cols(
pbp_data,
wp,
wp_spread
)

return(preds)
}

# helper column for wp calculator
drop.cols.wp <- c(
"wp", "vegas_wp"
)
Loading

0 comments on commit 496f647

Please sign in to comment.