We pull in lets-plot with %use
, which automatically sets up rich output
Fuel is 'officially supported' but was causing some problems. Anyway we need to manually import jsoup and moshi, they aren't supported by Kotlin-Jupyter.
@file:Repository("https://repo1.maven.org/maven2/")
@file:DependsOn("com.github.kittinunf.fuel:fuel:2.2.3")
@file:DependsOn("com.github.kittinunf.fuel:fuel-coroutines:2.2.3")
@file:DependsOn("org.jsoup:jsoup:1.13.1")
@file:DependsOn("com.squareup.moshi:moshi-kotlin:1.9.3")
@file:DependsOn("de.mpicbg.scicomp:krangl:0.13")
import java.io.File
import kotlinx.coroutines.*
import com.github.kittinunf.result.Result
import com.github.kittinunf.fuel.Fuel
import com.github.kittinunf.fuel.core.FuelManager
import com.github.kittinunf.fuel.coroutines.*
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import com.squareup.moshi.*
%use lets-plot
Note that normally we could simply annotate each class with @JsonClass
to tell Moshi to auto-build Json adapters.
I don't believe that's possible with Kotlin-Jupyter (happy to be wrong about this) so we will create adapters by hand
// @JsonClass(generateAdapter = true)
data class ScoringPlay(
val quarter : Int,
val timeString : String,
val secondsElapsed : Int,
val team : String,
val detail : String,
val awayscore : Int,
val homescore : Int
)
// @JsonClass(generateAdapter = true)
data class PFRWeek(val season : Int, val weeknumber : Int, val pfrURLs : List<String>)
// @JsonClass(generateAdapter = true)
data class PFRGame(
val season : Int,
val week : Int,
val pfrURL : String,
val hometeam : String,
val awayteam : String,
val homescore : Int,
val awayscore : Int,
val scoringplays : List<ScoringPlay>
)
// @JsonClass(generateAdapter = true)
data class TeamRecord(
val season : Int,
val teamname : String,
val url : String,
val abbr : String,
val wins : Int,
val losses : Int,
val ties : Int,
val pointsFor : Int,
val pointsAgainst : Int,
val pfrOSRS : Float,
val pfrDSRS : Float
)
data class PFRData(val games : List<PFRGame>, val records : List<TeamRecord>)
// in Kotlin-Jupyter, I don't think we can use codegen or reflection to auto-generate json adapters
// easy enough to do it here manually
val moshi : Moshi = Moshi.Builder().add(KotlinJsonAdapterFactory()).build()
val adapterScoringPlay : JsonAdapter<ScoringPlay> = moshi.adapter(ScoringPlay::class.java)
val adapterPFRGame : JsonAdapter<PFRGame> = moshi.adapter(PFRGame::class.java)
val adapterPFRWeek : JsonAdapter<PFRWeek> = moshi.adapter(PFRWeek::class.java)
val adapterTeamRecords : JsonAdapter<TeamRecord> = moshi.adapter(TeamRecord::class.java)
val adapterPFRData : JsonAdapter<PFRData> = moshi.adapter(PFRData::class.java)
weeks is just a conduit to get a list of all the game URLS -- we won't save it
from weeks, we can get games, which we will persist
we also need team records, which we also persist
// scraping NFL weeks from PFR - really we are only interested in the URL to each boxscore
fun getWeeks(seasonRange : IntRange, weekRange : IntRange = IntRange(1,3)) : List<PFRWeek> {
return seasonRange.fold(mutableListOf<PFRWeek>(), { accumulator , year ->
println("season: ${year}")
weekRange.map {w ->
println("- week: ${w}")
val (_, _, result) = Fuel.get("https://www.pro-football-reference.com/years/${year}/week_${w}.htm")
.responseString()
when (result) {
// we don't want to try to continue if there's been an error
is Result.Failure -> throw result.getException()
is Result.Success -> {
val pfrPage = result.get()
val doc : Document = Jsoup.parse(pfrPage)
val hrefs : List<String> =
doc.select(".game_summaries .game_summary .gamelink a")
.map {element -> element.attr("href")}
accumulator.add(PFRWeek(season = year, weeknumber = w, pfrURLs = hrefs))
}
}
}
accumulator
})
}
// scraping from PFR - we are getting the teams, final score, and scoring plays to be able to calculate all in-game point margins
// this is an ASYNC function (using Kotlin coroutines)
// note the `suspend fun`, `.awaitStringResponseResult()` and `coroutineScope`, `async` and `awaitAll`
// this is the only function that takes long enough to be worth async-ing
suspend fun getGames(weeks : List<PFRWeek>) : List<PFRGame> {
val games = mutableListOf<PFRGame>()
coroutineScope {
weeks.forEach { week ->
week.pfrURLs.map { url ->
async {
println("Game: season = ${week.season}, week = ${week.weeknumber}, url = ${url}")
val (_, _, result) = Fuel.get("https://www.pro-football-reference.com${url}")
.awaitStringResponseResult()
when (result) {
// we don't want to try to continue if there's been an error
is Result.Failure -> throw result.getException()
is Result.Success -> {
val pfrPage = result.get()
val doc : Document = Jsoup.parse(pfrPage)
val scoreboxes = doc.select(".scorebox > div")
val scorerows = doc.select("table#scoring tbody tr")
var currentQuarter = 1 // PFR only "announces" the quarter once (not on every row) so we need a stateholder
val scores = scorerows.map { r ->
currentQuarter = r.select("th[data-stat='quarter']").text().let {
when(it.trim()) {
"OT" -> 5
"OT2" -> 6
"" -> currentQuarter // when there's no value, we use the latest value
else -> it.toInt() // when a numerical value is present, (obviously) that's the new value
}
}
val secondsElapsed : Int = r.select("td[data-stat='time']").text().split(":").let {
(currentQuarter - 1) * 900 +
(14 - it[0].toInt()) * 60 +
(60 - it[1].toInt())
}
ScoringPlay(
quarter = currentQuarter,
timeString = r.select("td[data-stat='time']").text(),
secondsElapsed = secondsElapsed, // r.select("td[data-stat='time']").text(),
team = r.select("td[data-stat='team']").text(),
detail = r.select("td[data-stat='description']").text(),
awayscore = r.select("td[data-stat='vis_team_score']").text().toInt(),
homescore = r.select("td[data-stat='home_team_score']").text().toInt()
)
}
games.add(PFRGame(
season = week.season,
week = week.weeknumber,
pfrURL = url,
hometeam = scoreboxes[0].select("strong a").text(),
awayteam = scoreboxes[1].select("strong a").text(),
homescore = scoreboxes[0].select(".scores .score").text().toInt(),
awayscore = scoreboxes[1].select(".scores .score").text().toInt(),
scoringplays = scores
)
)
println("new game added!")
}
}
}
}.awaitAll()
}
}
return games
}
// scraping final season records from PFR - we want to know the record for the teams with large deficits
fun getTeamRecords(seasonRange : IntRange) : List<TeamRecord> {
val teamRecords = mutableListOf<TeamRecord>()
seasonRange.forEach { year ->
println("season: ${year}")
val (_, _, result) = Fuel.get("https://www.pro-football-reference.com/years/${year}/").responseString()
when (result) {
// we don't want to try to continue if there's been an error
is Result.Failure -> throw result.getException()
is Result.Success -> {
val pfrPage = result.get()
val doc : Document = Jsoup.parse(pfrPage)
val recordRows = doc.select(".content_grid tbody tr:not([class*=thead])")
recordRows.forEach { r ->
println(r.select("th a").text())
teamRecords.add(TeamRecord(
season = year,
teamname = r.select("th a").text(),
abbr = r.select("th a").attr("href").substringBeforeLast("/").substringAfterLast("/"),
url = r.select("th a").attr("href"),
wins = r.select("td[data-stat='wins']").text().toInt(),
losses = r.select("td[data-stat='losses']").text().toInt(),
ties = r.select("td[data-stat='ties']").text().let { if (it.isBlank()) 0 else it.toInt() },
pointsFor = r.select("td[data-stat='points']").text().toInt(),
pointsAgainst = r.select("td[data-stat='points_opp']").text().toInt(),
pfrOSRS = r.select("td[data-stat='srs_offense']").text().toFloat(),
pfrDSRS = r.select("td[data-stat='srs_defense']").text().toFloat(),
))
}
}
}
}
return teamRecords
}
val dataFile : File = File("e:/pfrdata_async.json")
if (!dataFile.exists()) {
println("...scraping data from Pro-Football-Reference...")
val pfrWeeks : List<PFRWeek> = getWeeks(seasonRange = IntRange(2015,2019), weekRange = IntRange(1,21))
runBlocking {
val pfrGames : List<PFRGame> = getGames(pfrWeeks) // this is the only async function
val teamRecords : List<TeamRecord> = getTeamRecords(seasonRange = IntRange(2015,2019))
val pfrData = PFRData(games = pfrGames , records = teamRecords)
dataFile.writeText( adapterPFRData.toJson(pfrData) )
}
} else {
println("...loading previously-scraped data...")
}
val (rawGames, teamRecords) = adapterPFRData.fromJson(dataFile.readText())!!
...loading previously-scraped data...
rawGames.size // count of all games in data set, including playoffs
1335
teamRecords.size // 5 seasons * 32 teams = 160 season records
160
rawGames[2].scoringplays // example of a list of scoring plays
[ScoringPlay(quarter=1, timeString=9:28, secondsElapsed=332, team=Bears, detail=Robbie Gould 28 yard field goal, awayscore=0, homescore=3), ScoringPlay(quarter=1, timeString=0:43, secondsElapsed=857, team=Packers, detail=James Jones 13 yard pass from Aaron Rodgers (Mason Crosby kick), awayscore=7, homescore=3), ScoringPlay(quarter=2, timeString=7:49, secondsElapsed=1331, team=Bears, detail=Matt Forte 1 yard rush (Robbie Gould kick), awayscore=7, homescore=10), ScoringPlay(quarter=2, timeString=2:32, secondsElapsed=1648, team=Packers, detail=Mason Crosby 37 yard field goal, awayscore=10, homescore=10), ScoringPlay(quarter=2, timeString=0:08, secondsElapsed=1792, team=Bears, detail=Robbie Gould 50 yard field goal, awayscore=10, homescore=13), ScoringPlay(quarter=3, timeString=11:56, secondsElapsed=1984, team=Packers, detail=James Jones 1 yard pass from Aaron Rodgers (Mason Crosby kick), awayscore=17, homescore=13), ScoringPlay(quarter=3, timeString=4:57, secondsElapsed=2403, team=Bears, detail=Robbie Gould 44 yard field goal, awayscore=17, homescore=16), ScoringPlay(quarter=4, timeString=10:26, secondsElapsed=2974, team=Packers, detail=Randall Cobb 5 yard pass from Aaron Rodgers (Mason Crosby kick), awayscore=24, homescore=16), ScoringPlay(quarter=4, timeString=1:55, secondsElapsed=3485, team=Packers, detail=Eddie Lacy 2 yard rush (Mason Crosby kick), awayscore=31, homescore=16), ScoringPlay(quarter=4, timeString=0:34, secondsElapsed=3566, team=Bears, detail=Martellus Bennett 24 yard pass from Jay Cutler (Robbie Gould kick), awayscore=31, homescore=23)]
// games with no scores in 1st half... this will require handling NULL for first-half margins
rawGames.filter { g -> g.scoringplays.filter { sp -> sp.secondsElapsed <= 1800 }.isEmpty() }
[PFRGame(season=2017, week=17, pfrURL=/boxscores/201712310phi.htm, hometeam=Philadelphia Eagles, awayteam=Dallas Cowboys, homescore=0, awayscore=6, scoringplays=[ScoringPlay(quarter=4, timeString=12:19, secondsElapsed=2861, team=Cowboys, detail=Brice Butler 20 yard pass from Dak Prescott (Dan Bailey kick failed), awayscore=6, homescore=0)]), PFRGame(season=2019, week=7, pfrURL=/boxscores/201910200was.htm, hometeam=Washington Redskins, awayteam=San Francisco 49ers, homescore=0, awayscore=9, scoringplays=[ScoringPlay(quarter=3, timeString=5:32, secondsElapsed=2368, team=49ers, detail=Robbie Gould 28 yard field goal, awayscore=3, homescore=0), ScoringPlay(quarter=4, timeString=9:06, secondsElapsed=3054, team=49ers, detail=Robbie Gould 22 yard field goal, awayscore=6, homescore=0), ScoringPlay(quarter=4, timeString=0:27, secondsElapsed=3573, team=49ers, detail=Robbie Gould 29 yard field goal, awayscore=9, homescore=0)])]
Our data is not entirely tabular, due to multiple scoring plays per game (thus multiple Margin classes). Also calculating winners/losers and matching up home/away with team names. So this class is a bit messy but necessary.
class GameAnalysis(val source : PFRGame, val teamRecords : List<TeamRecord>) {
val isPlayoff : Boolean = source.week >= 18
inner class Margin(
val points : Int,
val timeElapsed : Int,
val leadingSide : String, // home or away (not team name)
val lostLead : Boolean? = null, // (not implemented yet) did the leading team ever lose the lead, even if they eventually won
val wonGame : Boolean,
val leadingTeam : String = teamName(leadingSide),
val trailingTeam : String = teamName(opponent(leadingSide))
)
fun leader(away : Int, home : Int) : String = when {
away > home -> "away"
away < home -> "home"
away == home -> "tie"
else -> "uh-oh"
}
fun opponent(side : String) : String = when(side) {
"away" -> "home"
"home" -> "away"
"tie" -> "tie"
else -> "uh-oh"
}
fun teamName(side : String) : String = when(side) {
"away" -> source.awayteam
"home" -> source.hometeam
"tie" -> "tie"
else -> "uh-oh"
}
val winner : String = leader(source.awayscore, source.homescore)
val winningTeam : String = teamName(winner)
val losingTeam : String = teamName(opponent(winner))
fun teamRecord(team : String, season : Int) : TeamRecord =
teamRecords.filter {r -> r.season == season && r.teamname == team }.first()
val margins : List<Margin> = source.scoringplays
.map { p -> Margin(
points = Math.abs(p.awayscore - p.homescore),
timeElapsed = p.secondsElapsed,
leadingSide = leader(p.awayscore, p.homescore), // if (p.awayscore > p.homescore) "away" else "home",
wonGame = leader(p.awayscore, p.homescore) == this.winner
)}
val largestPointDiff : Int = margins.map {m -> m.points}.maxOrNull() ?: 0
val largestFirstHalfPointDiff : Int = margins.filter {m -> m.timeElapsed <= 1800}.map {m -> m.points}.maxOrNull() ?: 0
val largestMargin : Margin = margins.sortedByDescending { m -> m.points }.first()
val largestFirstHalfMargin : Margin? = margins.filter {m -> m.timeElapsed <= 1800}
.sortedByDescending { m -> m.points }.firstOrNull() // ?: Margin(0, 1800, "tie", null, false)
val display = largestFirstHalfMargin?.let { fhm -> "s${source.season}-w${source.week.toString().padStart(2, '0')} " +
"${fhm.trailingTeam} (${opponent(fhm.leadingSide)}) trailed by " +
"${fhm.points} to ${fhm.leadingTeam} " +
"and ${if (winner == fhm.leadingSide) "lost" else if (winner == "tie") "tied" else "won"} :: " +
"final record: ${teamRecord(fhm.trailingTeam, source.season).wins} wins"
} ?: "no first-half scoring"
}
we also need to define the "qualification" criteria and filter our list of games
val allgames = rawGames.map {GameAnalysis(it, teamRecords)}
// let's define this externally
val GameAnalysis.qualifies : Boolean
get() = this.margins.filter {m -> m.points >= 21 && m.timeElapsed <= 1800}.isNotEmpty() && !this.isPlayoff
val qualifyingGames : MutableList<GameAnalysis> = allgames.filter {g -> g.qualifies}.toMutableList()
qualifyingGames.size
120
qualifyingGames.filter {g -> !g.isPlayoff}.forEach { qg ->
println(qg.display)
}
s2015-w01 Houston Texans (home) trailed by 21 to Kansas City Chiefs and lost :: final record: 9 wins s2015-w01 Tampa Bay Buccaneers (home) trailed by 28 to Tennessee Titans and lost :: final record: 6 wins s2015-w01 Oakland Raiders (home) trailed by 24 to Cincinnati Bengals and lost :: final record: 7 wins s2015-w02 San Francisco 49ers (away) trailed by 26 to Pittsburgh Steelers and lost :: final record: 5 wins s2015-w02 Tennessee Titans (away) trailed by 21 to Cleveland Browns and lost :: final record: 3 wins s2015-w03 New York Jets (home) trailed by 24 to Philadelphia Eagles and lost :: final record: 10 wins s2015-w03 San Francisco 49ers (away) trailed by 28 to Arizona Cardinals and lost :: final record: 5 wins s2015-w03 Miami Dolphins (home) trailed by 27 to Buffalo Bills and lost :: final record: 6 wins s2015-w04 Houston Texans (away) trailed by 28 to Atlanta Falcons and lost :: final record: 9 wins s2015-w05 Detroit Lions (home) trailed by 21 to Arizona Cardinals and lost :: final record: 7 wins s2015-w07 Buffalo Bills (away) trailed by 24 to Jacksonville Jaguars and lost :: final record: 8 wins s2015-w07 Houston Texans (away) trailed by 41 to Miami Dolphins and lost :: final record: 9 wins s2015-w07 Washington Redskins (home) trailed by 24 to Tampa Bay Buccaneers and won :: final record: 9 wins s2015-w07 San Diego Chargers (home) trailed by 27 to Oakland Raiders and lost :: final record: 4 wins s2015-w08 Detroit Lions (away) trailed by 21 to Kansas City Chiefs and lost :: final record: 7 wins s2015-w13 Minnesota Vikings (home) trailed by 21 to Seattle Seahawks and lost :: final record: 11 wins s2015-w14 Tennessee Titans (away) trailed by 27 to New York Jets and lost :: final record: 3 wins s2015-w14 Atlanta Falcons (away) trailed by 28 to Carolina Panthers and lost :: final record: 8 wins s2015-w15 Buffalo Bills (away) trailed by 21 to Washington Redskins and lost :: final record: 8 wins s2015-w15 Tennessee Titans (away) trailed by 21 to New England Patriots and lost :: final record: 3 wins s2015-w15 San Francisco 49ers (home) trailed by 21 to Cincinnati Bengals and lost :: final record: 5 wins s2015-w15 Miami Dolphins (away) trailed by 23 to San Diego Chargers and lost :: final record: 6 wins s2015-w16 Jacksonville Jaguars (away) trailed by 24 to New Orleans Saints and lost :: final record: 5 wins s2015-w17 Dallas Cowboys (home) trailed by 24 to Washington Redskins and lost :: final record: 4 wins s2015-w17 Arizona Cardinals (home) trailed by 24 to Seattle Seahawks and lost :: final record: 13 wins s2015-w17 Tampa Bay Buccaneers (away) trailed by 21 to Carolina Panthers and lost :: final record: 6 wins s2016-w02 Miami Dolphins (away) trailed by 24 to New England Patriots and lost :: final record: 10 wins s2016-w02 Tampa Bay Buccaneers (away) trailed by 24 to Arizona Cardinals and lost :: final record: 9 wins s2016-w02 Jacksonville Jaguars (away) trailed by 21 to San Diego Chargers and lost :: final record: 3 wins s2016-w03 Detroit Lions (away) trailed by 28 to Green Bay Packers and lost :: final record: 9 wins s2016-w03 Chicago Bears (away) trailed by 21 to Dallas Cowboys and lost :: final record: 3 wins s2016-w03 San Francisco 49ers (away) trailed by 21 to Seattle Seahawks and lost :: final record: 2 wins s2016-w04 Kansas City Chiefs (away) trailed by 29 to Pittsburgh Steelers and lost :: final record: 12 wins s2016-w05 Houston Texans (away) trailed by 24 to Minnesota Vikings and lost :: final record: 9 wins s2016-w05 Cincinnati Bengals (away) trailed by 21 to Dallas Cowboys and lost :: final record: 6 wins s2016-w06 Carolina Panthers (away) trailed by 21 to New Orleans Saints and lost :: final record: 6 wins s2016-w08 Arizona Cardinals (away) trailed by 24 to Carolina Panthers and lost :: final record: 7 wins s2016-w08 Jacksonville Jaguars (away) trailed by 27 to Tennessee Titans and lost :: final record: 3 wins s2016-w10 Green Bay Packers (away) trailed by 25 to Tennessee Titans and lost :: final record: 10 wins s2016-w11 Tennessee Titans (away) trailed by 21 to Indianapolis Colts and lost :: final record: 9 wins s2016-w13 Miami Dolphins (away) trailed by 24 to Baltimore Ravens and lost :: final record: 10 wins s2016-w13 New York Jets (home) trailed by 21 to Indianapolis Colts and lost :: final record: 5 wins s2016-w14 San Diego Chargers (away) trailed by 23 to Carolina Panthers and lost :: final record: 5 wins s2016-w14 Los Angeles Rams (home) trailed by 21 to Atlanta Falcons and lost :: final record: 4 wins s2016-w15 Minnesota Vikings (home) trailed by 27 to Indianapolis Colts and lost :: final record: 8 wins s2016-w15 San Francisco 49ers (away) trailed by 21 to Atlanta Falcons and lost :: final record: 2 wins s2016-w16 New York Jets (away) trailed by 27 to New England Patriots and lost :: final record: 5 wins s2016-w17 New Orleans Saints (away) trailed by 22 to Atlanta Falcons and lost :: final record: 7 wins s2017-w01 Indianapolis Colts (away) trailed by 24 to Los Angeles Rams and lost :: final record: 4 wins s2017-w02 Chicago Bears (away) trailed by 26 to Tampa Bay Buccaneers and lost :: final record: 5 wins s2017-w03 Baltimore Ravens (away) trailed by 23 to Jacksonville Jaguars and lost :: final record: 9 wins s2017-w03 Cleveland Browns (away) trailed by 21 to Indianapolis Colts and lost :: final record: 0 wins s2017-w04 Chicago Bears (away) trailed by 21 to Green Bay Packers and lost :: final record: 5 wins s2017-w04 Cleveland Browns (home) trailed by 21 to Cincinnati Bengals and lost :: final record: 0 wins s2017-w04 Tennessee Titans (away) trailed by 21 to Houston Texans and lost :: final record: 9 wins s2017-w05 Arizona Cardinals (away) trailed by 21 to Philadelphia Eagles and lost :: final record: 8 wins s2017-w06 Cleveland Browns (away) trailed by 21 to Houston Texans and lost :: final record: 0 wins s2017-w06 Tampa Bay Buccaneers (away) trailed by 24 to Arizona Cardinals and lost :: final record: 5 wins s2017-w06 Detroit Lions (away) trailed by 21 to New Orleans Saints and lost :: final record: 9 wins s2017-w07 Arizona Cardinals (away) trailed by 23 to Los Angeles Rams and lost :: final record: 8 wins s2017-w09 Denver Broncos (away) trailed by 22 to Philadelphia Eagles and lost :: final record: 5 wins s2017-w11 Buffalo Bills (away) trailed by 30 to Los Angeles Chargers and lost :: final record: 9 wins s2017-w12 Chicago Bears (away) trailed by 24 to Philadelphia Eagles and lost :: final record: 5 wins s2017-w15 Cincinnati Bengals (away) trailed by 24 to Minnesota Vikings and lost :: final record: 7 wins s2017-w15 Houston Texans (away) trailed by 31 to Jacksonville Jaguars and lost :: final record: 4 wins s2017-w15 Seattle Seahawks (home) trailed by 34 to Los Angeles Rams and lost :: final record: 9 wins s2018-w01 Buffalo Bills (away) trailed by 26 to Baltimore Ravens and lost :: final record: 6 wins s2018-w01 Arizona Cardinals (home) trailed by 21 to Washington Redskins and lost :: final record: 3 wins s2018-w02 Baltimore Ravens (away) trailed by 21 to Cincinnati Bengals and lost :: final record: 10 wins s2018-w02 Pittsburgh Steelers (home) trailed by 21 to Kansas City Chiefs and lost :: final record: 9 wins s2018-w02 Buffalo Bills (home) trailed by 25 to Los Angeles Chargers and lost :: final record: 6 wins s2018-w03 Minnesota Vikings (home) trailed by 27 to Buffalo Bills and lost :: final record: 8 wins s2018-w03 San Francisco 49ers (away) trailed by 28 to Kansas City Chiefs and lost :: final record: 4 wins s2018-w04 Miami Dolphins (away) trailed by 24 to New England Patriots and lost :: final record: 7 wins s2018-w04 Tampa Bay Buccaneers (away) trailed by 35 to Chicago Bears and lost :: final record: 5 wins s2018-w05 Indianapolis Colts (away) trailed by 21 to New England Patriots and lost :: final record: 10 wins s2018-w05 Green Bay Packers (away) trailed by 24 to Detroit Lions and lost :: final record: 6 wins s2018-w06 Jacksonville Jaguars (away) trailed by 24 to Dallas Cowboys and lost :: final record: 5 wins s2018-w07 Buffalo Bills (away) trailed by 24 to Indianapolis Colts and lost :: final record: 6 wins s2018-w07 Arizona Cardinals (home) trailed by 32 to Denver Broncos and lost :: final record: 3 wins s2018-w07 San Francisco 49ers (home) trailed by 22 to Los Angeles Rams and lost :: final record: 4 wins s2018-w08 Tampa Bay Buccaneers (away) trailed by 21 to Cincinnati Bengals and lost :: final record: 5 wins s2018-w09 Tampa Bay Buccaneers (away) trailed by 28 to Carolina Panthers and lost :: final record: 5 wins s2018-w09 Buffalo Bills (home) trailed by 28 to Chicago Bears and lost :: final record: 6 wins s2018-w09 Los Angeles Rams (away) trailed by 21 to New Orleans Saints and lost :: final record: 13 wins s2018-w10 Cincinnati Bengals (home) trailed by 28 to New Orleans Saints and lost :: final record: 6 wins s2018-w10 Detroit Lions (away) trailed by 26 to Chicago Bears and lost :: final record: 6 wins s2018-w10 New York Jets (home) trailed by 31 to Buffalo Bills and lost :: final record: 4 wins s2018-w11 Tennessee Titans (away) trailed by 24 to Indianapolis Colts and lost :: final record: 9 wins s2018-w12 Cincinnati Bengals (home) trailed by 28 to Cleveland Browns and lost :: final record: 6 wins s2018-w13 Cleveland Browns (away) trailed by 23 to Houston Texans and lost :: final record: 7 wins s2018-w14 Washington Redskins (home) trailed by 34 to New York Giants and lost :: final record: 7 wins s2018-w15 Miami Dolphins (away) trailed by 21 to Minnesota Vikings and lost :: final record: 7 wins s2018-w17 Green Bay Packers (home) trailed by 21 to Detroit Lions and lost :: final record: 6 wins s2018-w17 San Francisco 49ers (away) trailed by 25 to Los Angeles Rams and lost :: final record: 4 wins s2018-w17 New Orleans Saints (home) trailed by 23 to Carolina Panthers and lost :: final record: 13 wins s2018-w17 Oakland Raiders (away) trailed by 21 to Kansas City Chiefs and lost :: final record: 4 wins s2019-w01 Atlanta Falcons (away) trailed by 21 to Minnesota Vikings and lost :: final record: 7 wins s2019-w01 Miami Dolphins (home) trailed by 39 to Baltimore Ravens and lost :: final record: 5 wins s2019-w02 Minnesota Vikings (away) trailed by 21 to Green Bay Packers and lost :: final record: 10 wins s2019-w03 Oakland Raiders (away) trailed by 21 to Minnesota Vikings and lost :: final record: 7 wins s2019-w03 Washington Redskins (home) trailed by 28 to Chicago Bears and lost :: final record: 3 wins s2019-w04 Los Angeles Rams (home) trailed by 21 to Tampa Bay Buccaneers and lost :: final record: 9 wins s2019-w05 New York Jets (away) trailed by 21 to Philadelphia Eagles and lost :: final record: 7 wins s2019-w06 Philadelphia Eagles (away) trailed by 21 to Minnesota Vikings and lost :: final record: 9 wins s2019-w06 Los Angeles Chargers (home) trailed by 21 to Pittsburgh Steelers and lost :: final record: 5 wins s2019-w07 New York Jets (home) trailed by 24 to New England Patriots and lost :: final record: 7 wins s2019-w08 Atlanta Falcons (home) trailed by 24 to Seattle Seahawks and lost :: final record: 7 wins s2019-w08 Carolina Panthers (away) trailed by 24 to San Francisco 49ers and lost :: final record: 5 wins s2019-w10 Cincinnati Bengals (home) trailed by 25 to Baltimore Ravens and lost :: final record: 2 wins s2019-w12 Miami Dolphins (away) trailed by 28 to Cleveland Browns and lost :: final record: 5 wins s2019-w12 Green Bay Packers (away) trailed by 23 to San Francisco 49ers and lost :: final record: 13 wins s2019-w12 Los Angeles Rams (home) trailed by 22 to Baltimore Ravens and lost :: final record: 9 wins s2019-w13 Jacksonville Jaguars (home) trailed by 25 to Tampa Bay Buccaneers and lost :: final record: 6 wins s2019-w13 Oakland Raiders (away) trailed by 21 to Kansas City Chiefs and lost :: final record: 7 wins s2019-w14 Houston Texans (home) trailed by 28 to Denver Broncos and lost :: final record: 10 wins s2019-w14 Jacksonville Jaguars (home) trailed by 21 to Los Angeles Chargers and lost :: final record: 6 wins s2019-w15 Detroit Lions (home) trailed by 21 to Tampa Bay Buccaneers and lost :: final record: 3 wins s2019-w15 Los Angeles Rams (away) trailed by 21 to Dallas Cowboys and lost :: final record: 9 wins s2019-w17 Carolina Panthers (home) trailed by 35 to New Orleans Saints and lost :: final record: 5 wins
We'd use data frames in Python (pandas) or R (dplyr). Kotlin's krangl
is less mature and earier we stated our data isn't exactly tabular. Because all our data is in defined classes, we can use those for plots instead.
// key is the number of wins (0-16), value is the number of times that was the final win total
// 5 seasons * 32 teams = 160 season win totals
val seasonWinTotals : Map<Int, Int> = IntRange(0,16).fold(mutableMapOf<Int, Int>(), { acc, i ->
acc[i] = teamRecords.filter {tr -> tr.wins == i}.size
acc
})
seasonWinTotals
{0=1, 1=1, 2=2, 3=8, 4=9, 5=16, 6=16, 7=23, 8=12, 9=20, 10=17, 11=11, 12=10, 13=11, 14=2, 15=1, 16=0}
// simple list of number of total wins by the trailing team in a qualifying game
val trailerWinTotals = qualifyingGames.map {
// largestFirstHalfMargin isn't null because our filter ensured it was non-empty, so we can safely use `!!`
qg -> qg.teamRecord(qg.largestFirstHalfMargin!!.trailingTeam, qg.source.season).wins
}
trailerWinTotals
[9, 6, 7, 5, 3, 10, 5, 6, 9, 7, 8, 9, 9, 4, 7, 11, 3, 8, 8, 3, 5, 6, 5, 4, 13, 6, 10, 9, 3, 9, 3, 2, 12, 9, 6, 6, 7, 3, 10, 9, 10, 5, 5, 4, 8, 2, 5, 7, 4, 5, 9, 0, 5, 0, 9, 8, 0, 5, 9, 8, 5, 9, 5, 7, 4, 9, 6, 3, 10, 9, 6, 8, 4, 7, 5, 10, 6, 5, 6, 3, 4, 5, 5, 6, 13, 6, 6, 4, 9, 6, 7, 7, 7, 6, 4, 13, 4, 7, 5, 10, 7, 3, 9, 7, 9, 5, 7, 7, 5, 2, 5, 13, 9, 6, 7, 10, 6, 3, 9, 5]
trailerWinTotals.average()
6.475
val p = lets_plot() { x = trailerWinTotals } + ggsize(640, 240)
p + geom_bar(stat=Stat.count()) +
xlab("season total wins") + ylab("qualifying games") +
xlim(IntRange(0,16)) + ggtitle("distribution of total season wins by large-deficit teams")
// note Stat.count() is the default for bar charts (geom_bar) so we can leave it out
is that really correct? maybe they were locked into a playoff spot and resting personnel?
qualifyingGames.filter { qg -> qg.teamRecord(qg.largestFirstHalfMargin!!.trailingTeam, qg.source.season).wins == 13 }
.map { qg -> qg.source.pfrURL }
[/boxscores/201601030crd.htm, /boxscores/201811040nor.htm, /boxscores/201812300nor.htm, /boxscores/201911240sfo.htm]
So only 1 definite "rest the starters game," the other 3 were matchups between playoff teams. Rams managed to tie their game. It's fair to exclude the NO-Carolina game but we should keep the others.
qualifyingGames.removeIf { qg -> qg.source.pfrURL == "/boxscores/201812300nor.htm" }
qualifyingGames.size
119
val trailerWinTotals = qualifyingGames.map {
// largestFirstHalfMargin isn't null because our filter ensured it was non-empty, so we can safely use `!!`
qg -> qg.teamRecord(qg.largestFirstHalfMargin!!.trailingTeam, qg.source.season).wins
}
print(trailerWinTotals.average())
val p = lets_plot() { x = trailerWinTotals } + ggsize(640, 240)
p + geom_bar() +
xlab("season total wins") + ylab("qualifying games") +
xlim(IntRange(0,16)) + ggtitle("distribution of total season wins by large-deficit teams")
6.420168067226891
val p = lets_plot() { x = seasonWinTotals.keys } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) { y=seasonWinTotals.values } +
xlab("season total wins") + ylab("seasons") +
xlim(IntRange(0,16)) + ggtitle("distribution of total season wins by all teams, 2015-2019")
We plotted how often teams finish with 0 through 16 wins. We also plotted how often our "large-deficit" teams finish with 0 through 16 wins. On casual observation, the large-deficit graph doesn't look TOO different, maybe just moved 1.5 games to the left (mean of 6.5 rather than 8).
val trailerWinCounts : Map<Int, Int> = IntRange(0,16).fold(mutableMapOf<Int, Int>(), { acc, i ->
acc[i] = trailerWinTotals.filter {twt -> twt == i}.size
acc
})
val trailerProbabilities : Map<Int, Double> = IntRange(0,16).fold(mutableMapOf<Int, Double>(), { acc, i ->
acc[i] = trailerWinCounts[i]!!.div(16.0 * seasonWinTotals[i]!!) // we won't have nulls because both maps have same keys 0-16
acc
})
val p = lets_plot() { x = trailerProbabilities.keys } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) { y = trailerProbabilities.values } +
xlab("season total wins") + ylab("P(qualifying)") +
xlim(IntRange(0,16)) + ggtitle("Probability of being a large-deficit team")
trailerProbabilities
{0=0.1875, 1=0.0, 2=0.09375, 3=0.078125, 4=0.06944444444444445, 5=0.08203125, 6=0.06640625, 7=0.043478260869565216, 8=0.036458333333333336, 9=0.059375, 10=0.029411764705882353, 11=0.005681818181818182, 12=0.00625, 13=0.017045454545454544, 14=0.0, 15=0.0, 16=NaN}