NFC East watch after Week 11 2020
Slight increase in expected division winner record
Procedure
The same procedure is implemented as last week.
- Number of simulations has been increased to 100,000.
- Tiebreakers have been coded up, so who actually wins can be calculated each time.
# load division alignment and results table up thru Week 11
food <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "Results")
food <- dplyr::filter(food, !.data$isTie)
food <- dplyr::select(food, .data$Week, .data$Winner, .data$Loser)
food <- dplyr::group_by(food, .data$Week)
food <- dplyr::mutate(food, game = dplyr::row_number())
food <- tidyr::gather(food, "key", "val", .data$Winner, .data$Loser)
food <- dplyr::group_by(food, .data$Week, .data$game, .data$key)
food <- dplyr::mutate(food, val = stringr::str_remove(.data$val, ".+ "))
food <- tidyr::spread(food, .data$key, .data$val)
whom <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "Alignment")
# load ratings and schedule
ELO <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "ELO_11")
# ELO <- dplyr::select(ELO, -.data$W, -.data$L, -.data$`T`, -.data$Diff)
ELO <- dplyr::mutate(ELO, bye = 0)
gms <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "Schedule")
gms <- dplyr::select(gms, -.data$Date, -.data$Time)
gms <- dplyr::filter(gms, .data$Week > 11)
gms <- dplyr::group_by(gms, .data$Week)
gms <- dplyr::mutate(gms, game = dplyr::row_number())
gms <- tidyr::gather(gms, "side", "name", .data$Home, .data$Visitor)
gms <- tidyr::nest(gms)
# a function to simulate a week of a season
foo <- function(elo, sch, res = dplyr::tibble()) {
sch <- dplyr::left_join(elo, sch)
sch <- dplyr::group_by(sch, .data$game)
sch <- dplyr::arrange(sch, .data$game, .data$side)
# there is a 33-point bonus for HFA and a 25-pt bonus for coming off a bye
sch <- dplyr::mutate(sch, emf = .data$ELO + 33 * (.data$side == "Home") + 25 * .data$bye)
win <- dplyr::filter(sch, !is.na(.data$game))
win <- dplyr::summarise(win,
edf = purrr::reduce(.data$emf, `-`), # difference in Elo after bonuses
exp = 10^(.data$edf / 400), # expected home wins per visitor win
win = sample(.data$side, 1, prob = c(.data$exp, 1)) # randomly draw a winner
)
sch <- dplyr::left_join(sch, win)
win <- dplyr::mutate(sch, win = ifelse(.data$side==.data$win, "Winner", "Loser"))
win <- dplyr::select(win, .data$name, .data$win)
win <- dplyr::filter(win, !is.na(.data$win))
win <- tidyr::spread(win, .data$win, .data$name)
win <- dplyr::group_by(win)
win <- dplyr::select(win, -.data$game)
sch <- dplyr::mutate(sch,
exp = ifelse(.data$side == "Home", .data$exp, 1), # expected wins per visitor win
exp = .data$exp / sum(.data$exp), # expected wins per game
win = as.numeric(.data$win == .data$side), # realized wins
del = 20 * (.data$win - .data$exp), # calculate Elo change
del = ifelse(is.na(.data$del), 0, .data$del), # set to 0 for teams on a bye
ELO = .data$ELO + .data$del, # add up
w = .data$w + ifelse(is.na(.data$win), 0, .data$win),
l = .data$l + ifelse(is.na(.data$win), 0, (1 - .data$win)),
bye = as.numeric(is.na(.data$game)) # set bye flag
)
sch <- dplyr::group_by(sch)
list(elo = dplyr::select(sch, .data$ELO:.data$bye),
res = dplyr::bind_rows(res, win))
}
v <- here::here("static", "data", "nfl", "11")
# recursive function to simulate a multiple-week season
bar <- function(SCH, elo, res = dplyr::tibble(), foo = foo, bar = bar) {
if(length(SCH) < 1) {
yoink <- list(elo = elo, res = res)
# u <- paste0(u, "/", j, "_", i, ".rds")
# saveRDS(yoink, u)
# return(u)
return(yoink)
}
baz <- foo(elo, dplyr::first(SCH), res)
bar(SCH[-1], baz$elo, baz$res, foo, bar)
}
cl <- parallel::makePSOCKcluster(parallel::detectCores())
doParallel::registerDoParallel(cl)
# sims <- dplyr::tibble(j = 1:16)
# run ten thousand simulations
soms <- dplyr::tibble(i = 1:6250)
soms <- dplyr::group_by(soms, .data$i)
system.time(sims <- dplyr::tibble(thing = plyr::llply(1:16, function(j, soms, SCH, ELO, bar, foo) {
# thing = lapply(.data$j, function(j, soms, SCH, ELO) {
list(dplyr::mutate(soms, filename = list(bar(SCH, ELO, foo = foo, bar = bar))))
}, soms = soms, SCH = gms$data, ELO = ELO, bar = bar, foo = foo, .parallel = TRUE)))
parallel::stopCluster(cl)
# save simulations
saveRDS(sims, here::here("static", "data", "nfl_sims_11_20.rds"))
# reload saved simulations instead of computing them every time I save the post
sims <- readRDS(here::here("static", "data", "nfl_sims_11_20.rds"))
sims <- dplyr::mutate(sims, j = dplyr::row_number())
sims <- tidyr::unnest(sims, .data$thing)
sims <- tidyr::unnest(sims, .data$thing)
sims <- dplyr::group_by(sims, .data$j, .data$i)
sims <- dplyr::mutate(sims, filename = list(lapply(dplyr::first(.data$filename), list))) # wow I left this in a janky state in the last step
sims <- dplyr::group_modify(sims, function(.x, .y) {dplyr::as_tibble(dplyr::first(.x$filename))})
# function to filter NFC East winner, including first four tiebreakers
boop <- function(rec, res) {
roc <- dplyr::left_join(whom, rec)
roc <- dplyr::filter(roc, .data$Conference == "NFC", .data$Division == "East")
roc <- dplyr::mutate(roc, t = ifelse(is.na(.data$t), 0, .data$t))
roc <- dplyr::mutate(roc, pct = .data$w + .data$t / 2)
# roc <- dplyr::filter(roc, .data$pct == min(.data$pct)) # NO
roc <- dplyr::filter(roc, .data$pct == max(.data$pct))
if(nrow(roc) == 1) {return(roc)}
rys <- dplyr::bind_rows(food, res)
rys <- dplyr::group_by(rys)
rys <- dplyr::select(rys, .data$Loser, .data$Winner)
rys <- dplyr::mutate(rys, num = dplyr::row_number())
# head-to-head
ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rys)
ryc <- dplyr::left_join(dplyr::select(roc, Loser = .data$name), ryc)
ryc <- dplyr::rename(ryc, name = .data$Winner)
ryc <- dplyr::group_by(ryc, .data$name)
ryc <- dplyr::summarise(ryc, HTH = dplyr::n())
ryc <- dplyr::filter(ryc, .data$HTH == max(.data$HTH))
roc <- dplyr::left_join(ryc, roc)
if(nrow(roc) == 1) {return(roc)}
# division
ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rys)
wha <- dplyr::rename(whom, Loser = .data$name)
ryc <- dplyr::left_join(wha, ryc)
ryc <- dplyr::filter(ryc, !is.na(.data$Winner))
ryc <- dplyr::rename(ryc, name = .data$Winner)
ryc <- dplyr::group_by(ryc, .data$name)
ryc <- dplyr::summarise(ryc, div = dplyr::n())
ryc <- dplyr::filter(ryc, .data$div == max(.data$div))
roc <- dplyr::left_join(ryc, roc)
if(nrow(roc) == 1) {return(roc)}
# common?
rus <- tidyr::gather(rys, "key", "name", .data$Loser, .data$Winner)
rus <- dplyr::left_join(roc, rus)
rus <- dplyr::left_join(rus, rys)
rus <- tidyr::gather(rus, "yek", "versus", .data$Loser, .data$Winner)
rus <- dplyr::filter(rus, .data$key != .data$yek)
rus <- dplyr::select(rus, .data$num, .data$name, .data$versus)
rus <- dplyr::group_by(rus, .data$versus, .data$name)
rus <- dplyr::summarise(rus, n = dplyr::n())
rus <- tidyr::spread(rus, .data$name, .data$n, fill = 0)
rus <- dplyr::filter(rus, dplyr::across(tidyselect::everything(), `!=`, y = 0))
rus <- dplyr::summarise(rus)
rus <- dplyr::rename(rus, Loser = .data$versus)
rus <- dplyr::left_join(rus, rys)
ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rus)
ryc <- dplyr::rename(ryc, name = .data$Winner)
ryc <- dplyr::group_by(ryc, .data$name)
ryc <- dplyr::summarise(ryc, com = dplyr::n())
ryc <- dplyr::filter(ryc, .data$com == max(.data$com))
roc <- dplyr::left_join(ryc, roc)
if(nrow(roc) == 1) {return(roc)}
# conference
ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rys)
wha <- dplyr::rename(whom, Loser = .data$name)
wha <- dplyr::select(wha, -.data$Division)
ryc <- dplyr::left_join(wha, ryc)
ryc <- dplyr::filter(ryc, !is.na(.data$Winner))
ryc <- dplyr::rename(ryc, name = .data$Winner)
ryc <- dplyr::group_by(ryc, .data$name)
ryc <- dplyr::summarise(ryc, cnf = dplyr::n())
ryc <- dplyr::filter(ryc, .data$cnf == max(.data$cnf))
roc <- dplyr::left_join(ryc, roc)
dplyr::sample_n(roc, 1)
}
# do filtering
system.time(sums <- dplyr::group_modify(sims, function(.x, .y) {
pkgcond::suppress_messages(boop(dplyr::first(.x$elo), dplyr::first(.x$res)))
}))
saveRDS(sums, here::here("static", "data", "nfc_east_winners_11_20.rds"))
Results
Let’s make the same table from last week.
sums <- readRDS(here::here("static", "data", "nfc_east_winners_11_20.rds"))
sems <- dplyr::mutate(sums, wlt = paste(.data$w, .data$l, .data$t, sep = "-"))
sems <- dplyr::group_by(sems, .data$pct, .data$wlt)
sems <- dplyr::summarise(sems)
## `summarise()` regrouping output by 'pct' (override with `.groups` argument)
sems <- dplyr::group_by(sems)
sems <- dplyr::mutate(sems, wlt = factor(.data$pct, levels = .data$pct, labels = .data$wlt))
sums <- dplyr::left_join(sums, sems)
## Joining, by = c("pct", "wlt")
sams <- table(sums$wlt)
cumsum(sams)
## 4-11-1 5-11-0 5-10-1 6-10-0 6-9-1 7-9-0 7-8-1 8-8-0 8-7-1 9-7-0 9-6-1
## 35 2200 7211 29653 46398 73124 85055 94683 98368 99576 100000
Since last week:
- Chances of a <= 6-win East champ have declined a little, to 46.4%.
- Chances of a >= 10-loss East champ have held steady.
- Chances of a <= 4-win East champ have nearly evaporated.
- Chances of a >= 10-win champ appear gone.
Here’s a graphical breakdown by team and record:
graf <- dplyr::group_by(sums, .data$name, .data$wlt)
graf <- dplyr::summarise(graf, n = dplyr::n())
## `summarise()` regrouping output by 'name' (override with `.groups` argument)
graf <- dplyr::group_by(graf)
graf <- dplyr::mutate(graf, p = .data$n / sum(.data$n))
flop <- cbind(L = 60, u = c(48, -28, -28, 9), v = c(9, 28, -28, 48))
flop <- grDevices::convertColor(flop, "Luv", "sRGB")
flop <- apply(flop, 1, as.list)
flop <- lapply(flop, do.call, what = rgb)
ggplot2::ggplot(graf, ggplot2::aes(x = .data$name,
color = factor(.data$name, levels = c("Giants", "Eagles", "Cowboys", "Team")),
size = .data$p,
y = .data$wlt)) +
ggplot2::scale_color_manual(guide = FALSE, values = flop) +
ggplot2::scale_size(range = c(0, 25)) +
ggplot2::geom_point()