Note: this notebook was updated July 2021 to point to newer versions of its dependencies, which had become deprecated and were not allowing the notebook to complete successfully. It was also used for a presentation of Kotlin's Jupyter kernel in March 2021, so the 2020 season data, which didn't exist at the time the initial article was written, was added.
// two "supported" packages, we can skip the full dependency & import boilerplate
%use lets-plot, krangl
// csv is courtesy of pro-football-reference: https://www.pro-football-reference.com/years/NFL/passing.htm
val dfPassing = DataFrame.readCSV("nfl_passing.csv")
dfPassing
Rk | Year | Tms | Cmp | Att | Cmp% | Yds | TD | TD% | Int | Int% | Y/A | AY/A | Y/C | Y/G | Rate | Sk | SkYds | NY/A | ANY/A | Sk% |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2020 | 32 | 11756 | 18018 | 65.2 | 122957 | 871 | 4.8 | 395 | 2.2 | 7.2 | 7.2 | 11.1 | 240.2 | 93.6 | 1135 | 7543 | 6.42 | 6.4 | 5.9 |
2 | 2019 | 32 | 11331 | 17853 | 63.5 | 120301 | 797 | 4.5 | 410 | 2.3 | 7.2 | 7.1 | 11.4 | 235.0 | 90.4 | 1276 | 8610 | 6.29 | 6.2 | 6.7 |
3 | 2018 | 32 | 11462 | 17671 | 64.9 | 121737 | 847 | 4.8 | 419 | 2.4 | 7.4 | 7.3 | 11.4 | 237.8 | 92.9 | 1281 | 8530 | 6.42 | 6.3 | 6.8 |
4 | 2017 | 32 | 10856 | 17488 | 62.1 | 114870 | 741 | 4.2 | 430 | 2.5 | 7.0 | 6.8 | 11.3 | 224.4 | 86.9 | 1195 | 7810 | 6.15 | 5.9 | 6.4 |
5 | 2016 | 32 | 11526 | 18295 | 63.0 | 123639 | 786 | 4.3 | 415 | 2.3 | 7.2 | 7.0 | 11.4 | 241.5 | 89.3 | 1118 | 7225 | 6.37 | 6.2 | 5.8 |
6 | 2015 | 32 | 11527 | 18298 | 63.0 | 124843 | 842 | 4.6 | 436 | 2.4 | 7.3 | 7.1 | 11.5 | 243.8 | 90.2 | 1187 | 7850 | 6.41 | 6.3 | 6.1 |
7 | 2014 | 32 | 11200 | 17879 | 62.6 | 121247 | 807 | 4.5 | 450 | 2.5 | 7.2 | 7.0 | 11.5 | 236.8 | 88.9 | 1212 | 7651 | 6.35 | 6.1 | 6.3 |
8 | 2013 | 32 | 11102 | 18136 | 61.2 | 120626 | 804 | 4.4 | 502 | 2.8 | 7.1 | 6.8 | 11.6 | 235.6 | 86.0 | 1295 | 8551 | 6.21 | 5.9 | 6.7 |
9 | 2012 | 32 | 10833 | 17788 | 60.9 | 118418 | 757 | 4.3 | 468 | 2.6 | 7.1 | 6.7 | 11.6 | 231.3 | 85.6 | 1169 | 7533 | 6.25 | 5.9 | 6.2 |
10 | 2011 | 32 | 10464 | 17410 | 60.1 | 117601 | 745 | 4.3 | 506 | 2.9 | 7.2 | 6.7 | 12.0 | 229.7 | 84.3 | 1188 | 7729 | 6.32 | 5.9 | 6.4 |
11 | 2010 | 32 | 10491 | 17269 | 60.8 | 113450 | 751 | 4.3 | 511 | 3.0 | 7.0 | 6.5 | 11.5 | 221.6 | 84.1 | 1130 | 7514 | 6.17 | 5.7 | 6.1 |
12 | 2009 | 32 | 10372 | 17033 | 60.9 | 111851 | 710 | 4.2 | 525 | 3.1 | 7.0 | 6.4 | 11.5 | 218.5 | 83.0 | 1101 | 7066 | 6.17 | 5.6 | 6.1 |
13 | 2008 | 32 | 10081 | 16526 | 61.0 | 108177 | 646 | 3.9 | 465 | 2.8 | 6.9 | 6.5 | 11.4 | 211.3 | 83.2 | 1036 | 6589 | 6.16 | 5.7 | 5.9 |
14 | 2007 | 32 | 10425 | 17045 | 61.2 | 109722 | 720 | 4.2 | 534 | 3.1 | 6.9 | 6.3 | 11.2 | 214.3 | 82.6 | 1102 | 7152 | 6.05 | 5.5 | 6.1 |
15 | 2006 | 32 | 9796 | 16389 | 59.8 | 104861 | 648 | 4.0 | 520 | 3.2 | 6.9 | 6.2 | 11.5 | 204.8 | 80.4 | 1164 | 7416 | 5.97 | 5.4 | 6.6 |
16 | 2005 | 32 | 9790 | 16464 | 59.5 | 104168 | 644 | 3.9 | 506 | 3.1 | 6.8 | 6.2 | 11.4 | 203.5 | 80.1 | 1182 | 7553 | 5.90 | 5.3 | 6.7 |
17 | 2004 | 32 | 9772 | 16354 | 59.8 | 107797 | 732 | 4.5 | 524 | 3.2 | 7.1 | 6.5 | 11.8 | 210.5 | 82.8 | 1196 | 7541 | 6.14 | 5.6 | 6.8 |
18 | 2003 | 32 | 9695 | 16493 | 58.8 | 102628 | 654 | 4.0 | 538 | 3.3 | 6.6 | 6.0 | 11.3 | 200.4 | 78.3 | 1092 | 6839 | 5.84 | 5.2 | 6.2 |
19 | 2002 | 32 | 10314 | 17292 | 59.6 | 108661 | 694 | 4.0 | 528 | 3.1 | 6.7 | 6.1 | 11.3 | 212.2 | 80.4 | 1175 | 7540 | 5.88 | 5.3 | 6.4 |
20 | 2001 | 31 | 9542 | 16181 | 59.0 | 102080 | 635 | 3.9 | 545 | 3.4 | 6.8 | 6.0 | 11.5 | 205.8 | 78.5 | 1196 | 7559 | 5.87 | 5.2 | 6.9 |
... only showing top 20 rows
val mapPassing = dfPassing.filter { (it["Year"] lt 2021) AND (it["Year"] gt 1990) }.toMap()
mapPassing
{Rk=[Ljava.lang.Integer;@2873d672, Year=[Ljava.lang.Integer;@3bc735b3, Tms=[Ljava.lang.Integer;@577f9109, Cmp=[Ljava.lang.Integer;@4303b7f0, Att=[Ljava.lang.Integer;@757529a4, Cmp%=[Ljava.lang.Double;@779de014, Yds=[Ljava.lang.Integer;@5c41d037, TD=[Ljava.lang.Integer;@2234078, TD%=[Ljava.lang.Double;@5ec77191, Int=[Ljava.lang.Integer;@4642b71d, Int%=[Ljava.lang.Double;@1450078a, Y/A=[Ljava.lang.Double;@c68a5f8, AY/A=[Ljava.lang.Double;@69c6161d, Y/C=[Ljava.lang.Double;@3aefae67, Y/G=[Ljava.lang.Double;@2e1792e7, Rate=[Ljava.lang.Double;@6719a5b8, Sk=[Ljava.lang.String;@3eb631b8, SkYds=[Ljava.lang.String;@796d3c9f, NY/A=[Ljava.lang.String;@6bff19ff, ANY/A=[Ljava.lang.String;@41e1455d, Sk%=[Ljava.lang.String;@4e558728}
val p = lets_plot(mapPassing) { x = "Year"; y = "AY/A" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
ggtitle("Avg Adjusted Yds/Attempt per NFL regular season")
val p = lets_plot(mapPassing) { x = "Year"; y = "Rate" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
ggtitle("Total Combined Passer Rating per NFL regular season")
val dfPassingRanges = dfPassing
.filter { (it["Year"] lt 2021) AND (it["Year"] gt 1990) }
.addColumn("YearRange") { it["Year"].map<Double>{ floor(it.minus(1).div(5.0)).times(5).plus(1).toInt() }}
.addColumn("Years") { it["YearRange"].map<Int>{ "$it - ${it + 4}" }}
val mapPassingRanges = dfPassingRanges
.select({ listOf("Year", "AY/A", "Rate", "YearRange", "Years") })
.groupBy("YearRange", "Years")
.summarize(
"mean_AY/A" to { it["AY/A"].mean(removeNA = true) },
"mean_Rate" to { it["Rate"].mean(removeNA = true) }
).toMap()
val xlimits = mapPassingRanges["Years"]?.toSet()?.reversed()?.filterNotNull()
val p = letsPlot(mapPassingRanges) { x = "Years"; y = "mean_AY/A" } + ggsize(720, 240)
p + geomBar(stat=Stat.identity) + scaleXDiscrete(limits = xlimits) +
scaleYContinuous(limits = Pair(4.0, 8.0)) +
ggtitle("Average Adjusted Yards/Attempt per NFL regular season")
val p = letsPlot(mapPassingRanges) { x = "Years"; y = "mean_Rate" } + ggsize(720, 240)
p + geomBar(stat=Stat.identity) + scaleXDiscrete(limits = xlimits) +
ggtitle("Total Combined Passer Rating per NFL regular season")