NFC East watch after Week 11 2020

Slight increase in expected division winner record

Procedure

The same procedure is implemented as last week.

  • Number of simulations has been increased to 100,000.
  • Tiebreakers have been coded up, so who actually wins can be calculated each time.
# load division alignment and results table up thru Week 11
food <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "Results")
food <- dplyr::filter(food, !.data$isTie)
food <- dplyr::select(food, .data$Week, .data$Winner, .data$Loser)
food <- dplyr::group_by(food, .data$Week)
food <- dplyr::mutate(food, game = dplyr::row_number())
food <- tidyr::gather(food, "key", "val", .data$Winner, .data$Loser)
food <- dplyr::group_by(food, .data$Week, .data$game, .data$key)
food <- dplyr::mutate(food, val = stringr::str_remove(.data$val, ".+ "))
food <- tidyr::spread(food, .data$key, .data$val)

whom <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "Alignment")

# load ratings and schedule
ELO <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "ELO_11")
# ELO <- dplyr::select(ELO, -.data$W, -.data$L, -.data$`T`, -.data$Diff)
ELO <- dplyr::mutate(ELO, bye = 0)

gms <- readxl::read_xlsx(here::here("static", "data", "NFL.xlsx"), "Schedule")
gms <- dplyr::select(gms, -.data$Date, -.data$Time)
gms <- dplyr::filter(gms, .data$Week > 11)

gms <- dplyr::group_by(gms, .data$Week)
gms <- dplyr::mutate(gms, game = dplyr::row_number())
gms <- tidyr::gather(gms, "side", "name", .data$Home, .data$Visitor)
gms <- tidyr::nest(gms)

# a function to simulate a week of a season
foo <- function(elo, sch, res = dplyr::tibble()) {
  sch <- dplyr::left_join(elo, sch)
  sch <- dplyr::group_by(sch, .data$game)
  sch <- dplyr::arrange(sch, .data$game, .data$side)
  
  # there is a 33-point bonus for HFA and a 25-pt bonus for coming off a bye
  sch <- dplyr::mutate(sch, emf = .data$ELO + 33 * (.data$side == "Home") + 25 * .data$bye)
  
  win <- dplyr::filter(sch, !is.na(.data$game))
  win <- dplyr::summarise(win, 
    edf = purrr::reduce(.data$emf, `-`),                # difference in Elo after bonuses
    exp = 10^(.data$edf / 400),                         # expected home wins per visitor win
    win = sample(.data$side, 1, prob = c(.data$exp, 1)) # randomly draw a winner
  )
  
  sch <- dplyr::left_join(sch, win)
  
  win <- dplyr::mutate(sch, win = ifelse(.data$side==.data$win, "Winner", "Loser"))
  win <- dplyr::select(win, .data$name, .data$win)
  win <- dplyr::filter(win, !is.na(.data$win))
  win <- tidyr::spread(win, .data$win, .data$name)
  win <- dplyr::group_by(win)
  win <- dplyr::select(win, -.data$game)
  
  sch <- dplyr::mutate(sch, 
          exp = ifelse(.data$side == "Home", .data$exp, 1), # expected wins per visitor win
          exp = .data$exp / sum(.data$exp),                 # expected wins per game
          win = as.numeric(.data$win == .data$side),        # realized wins
          del = 20 * (.data$win - .data$exp),               # calculate Elo change
          del = ifelse(is.na(.data$del), 0, .data$del),     # set to 0 for teams on a bye
          ELO = .data$ELO + .data$del,                      # add up
          w   = .data$w   + ifelse(is.na(.data$win), 0, .data$win),
          l   = .data$l   + ifelse(is.na(.data$win), 0, (1 - .data$win)),
          bye = as.numeric(is.na(.data$game))               # set bye flag
        )
  
  sch <- dplyr::group_by(sch)
  
  list(elo = dplyr::select(sch, .data$ELO:.data$bye),
       res = dplyr::bind_rows(res, win))
}

v <- here::here("static", "data", "nfl", "11")

# recursive function to simulate a multiple-week season
bar <- function(SCH, elo, res = dplyr::tibble(), foo = foo, bar = bar) {
  if(length(SCH) < 1) {
    yoink <- list(elo = elo, res = res)
    
    # u <- paste0(u, "/", j, "_", i, ".rds")
    
    # saveRDS(yoink, u)
    
    # return(u)
    
    return(yoink)
  }
  
  baz <- foo(elo, dplyr::first(SCH), res)
  
  bar(SCH[-1], baz$elo, baz$res, foo, bar)
}

cl <- parallel::makePSOCKcluster(parallel::detectCores())
doParallel::registerDoParallel(cl)

# sims <- dplyr::tibble(j = 1:16)
# run ten thousand simulations
soms <- dplyr::tibble(i = 1:6250)
soms <- dplyr::group_by(soms, .data$i)

system.time(sims <- dplyr::tibble(thing = plyr::llply(1:16, function(j, soms, SCH, ELO, bar, foo) {
                                  # thing = lapply(.data$j, function(j, soms, SCH, ELO) {
  list(dplyr::mutate(soms, filename = list(bar(SCH, ELO, foo = foo, bar = bar))))
}, soms = soms, SCH = gms$data, ELO = ELO, bar = bar, foo = foo, .parallel = TRUE)))

parallel::stopCluster(cl)

# save simulations
saveRDS(sims, here::here("static", "data", "nfl_sims_11_20.rds"))
# reload saved simulations instead of computing them every time I save the post
sims <- readRDS(here::here("static", "data", "nfl_sims_11_20.rds"))
sims <- dplyr::mutate(sims, j = dplyr::row_number())
sims <- tidyr::unnest(sims, .data$thing)
sims <- tidyr::unnest(sims, .data$thing)
sims <- dplyr::group_by(sims, .data$j, .data$i)
sims <- dplyr::mutate(sims, filename = list(lapply(dplyr::first(.data$filename), list))) # wow I left this in a janky state in the last step
sims <- dplyr::group_modify(sims, function(.x, .y) {dplyr::as_tibble(dplyr::first(.x$filename))})

# function to filter NFC East winner, including first four tiebreakers
boop <- function(rec, res) {
  roc <- dplyr::left_join(whom, rec)
  roc <- dplyr::filter(roc, .data$Conference == "NFC", .data$Division == "East")
  roc <- dplyr::mutate(roc, t = ifelse(is.na(.data$t), 0, .data$t))
  roc <- dplyr::mutate(roc, pct = .data$w + .data$t / 2)
  
  # roc <- dplyr::filter(roc, .data$pct == min(.data$pct)) # NO
  roc <- dplyr::filter(roc, .data$pct == max(.data$pct))
  
  if(nrow(roc) == 1) {return(roc)}
  
  rys <- dplyr::bind_rows(food, res)
  rys <- dplyr::group_by(rys)
  rys <- dplyr::select(rys, .data$Loser, .data$Winner)
  rys <- dplyr::mutate(rys, num = dplyr::row_number())
    
  # head-to-head
  ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rys)
  ryc <- dplyr::left_join(dplyr::select(roc, Loser  = .data$name), ryc)
  ryc <- dplyr::rename(ryc, name = .data$Winner)
  ryc <- dplyr::group_by(ryc, .data$name)
  ryc <- dplyr::summarise(ryc, HTH = dplyr::n())
  ryc <- dplyr::filter(ryc, .data$HTH == max(.data$HTH))
  
  roc <- dplyr::left_join(ryc, roc)
  
  if(nrow(roc) == 1) {return(roc)}
  
  # division
  ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rys)
  wha <- dplyr::rename(whom, Loser = .data$name)
  ryc <- dplyr::left_join(wha, ryc)
  ryc <- dplyr::filter(ryc, !is.na(.data$Winner))
  ryc <- dplyr::rename(ryc, name = .data$Winner)
  ryc <- dplyr::group_by(ryc, .data$name)
  ryc <- dplyr::summarise(ryc, div = dplyr::n())
  ryc <- dplyr::filter(ryc, .data$div == max(.data$div))
  
  roc <- dplyr::left_join(ryc, roc)
  
  if(nrow(roc) == 1) {return(roc)}
  
  # common?
  rus <- tidyr::gather(rys, "key", "name", .data$Loser, .data$Winner)
  rus <- dplyr::left_join(roc, rus)
  rus <- dplyr::left_join(rus, rys)
  rus <- tidyr::gather(rus, "yek", "versus", .data$Loser, .data$Winner)
  rus <- dplyr::filter(rus, .data$key != .data$yek)
  rus <- dplyr::select(rus, .data$num, .data$name, .data$versus)
  rus <- dplyr::group_by(rus, .data$versus, .data$name)
  rus <- dplyr::summarise(rus, n = dplyr::n())
  rus <- tidyr::spread(rus, .data$name, .data$n, fill = 0)
  rus <- dplyr::filter(rus, dplyr::across(tidyselect::everything(), `!=`, y = 0))
  rus <- dplyr::summarise(rus)
  rus <- dplyr::rename(rus, Loser = .data$versus)
  rus <- dplyr::left_join(rus, rys)
  
  ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rus)
  ryc <- dplyr::rename(ryc, name = .data$Winner)
  ryc <- dplyr::group_by(ryc, .data$name)
  ryc <- dplyr::summarise(ryc, com = dplyr::n())
  ryc <- dplyr::filter(ryc, .data$com == max(.data$com))
  
  roc <- dplyr::left_join(ryc, roc)
  
  if(nrow(roc) == 1) {return(roc)}
  
  # conference
  ryc <- dplyr::left_join(dplyr::rename(roc, Winner = .data$name), rys)
  wha <- dplyr::rename(whom, Loser = .data$name)
  wha <- dplyr::select(wha, -.data$Division)
  ryc <- dplyr::left_join(wha, ryc)
  ryc <- dplyr::filter(ryc, !is.na(.data$Winner))
  ryc <- dplyr::rename(ryc, name = .data$Winner)
  ryc <- dplyr::group_by(ryc, .data$name)
  ryc <- dplyr::summarise(ryc, cnf = dplyr::n())
  ryc <- dplyr::filter(ryc, .data$cnf == max(.data$cnf))
  
  roc <- dplyr::left_join(ryc, roc)
  
  dplyr::sample_n(roc, 1)
}

# do filtering
system.time(sums <- dplyr::group_modify(sims, function(.x, .y) {
  pkgcond::suppress_messages(boop(dplyr::first(.x$elo), dplyr::first(.x$res)))
}))

saveRDS(sums, here::here("static", "data", "nfc_east_winners_11_20.rds"))

Results

Let’s make the same table from last week.

sums <- readRDS(here::here("static", "data", "nfc_east_winners_11_20.rds"))

sems <- dplyr::mutate(sums, wlt = paste(.data$w, .data$l, .data$t, sep = "-"))
sems <- dplyr::group_by(sems, .data$pct, .data$wlt)
sems <- dplyr::summarise(sems)
## `summarise()` regrouping output by 'pct' (override with `.groups` argument)
sems <- dplyr::group_by(sems)
sems <- dplyr::mutate(sems, wlt = factor(.data$pct, levels = .data$pct, labels = .data$wlt))
sums <- dplyr::left_join(sums, sems)
## Joining, by = c("pct", "wlt")
sams <- table(sums$wlt)
cumsum(sams)
## 4-11-1 5-11-0 5-10-1 6-10-0  6-9-1  7-9-0  7-8-1  8-8-0  8-7-1  9-7-0  9-6-1 
##     35   2200   7211  29653  46398  73124  85055  94683  98368  99576 100000

Since last week:

  • Chances of a <= 6-win East champ have declined a little, to 46.4%.
  • Chances of a >= 10-loss East champ have held steady.
  • Chances of a <= 4-win East champ have nearly evaporated.
  • Chances of a >= 10-win champ appear gone.

Here’s a graphical breakdown by team and record:

graf <- dplyr::group_by(sums, .data$name, .data$wlt)
graf <- dplyr::summarise(graf, n = dplyr::n())
## `summarise()` regrouping output by 'name' (override with `.groups` argument)
graf <- dplyr::group_by(graf)
graf <- dplyr::mutate(graf, p = .data$n / sum(.data$n))

flop <- cbind(L = 60, u = c(48, -28, -28, 9), v = c(9, 28, -28, 48))
flop <- grDevices::convertColor(flop, "Luv", "sRGB")
flop <- apply(flop, 1, as.list)
flop <- lapply(flop, do.call, what = rgb)

ggplot2::ggplot(graf, ggplot2::aes(x      = .data$name,
                                   color  = factor(.data$name, levels = c("Giants", "Eagles", "Cowboys", "Team")),
                                   size   = .data$p,
                                   y      = .data$wlt)) +
  ggplot2::scale_color_manual(guide = FALSE, values = flop) + 
  ggplot2::scale_size(range = c(0, 25)) +
  ggplot2::geom_point()

Gordon Arsenoff
Senior Research Specialist

Bayesian. He/him.