// two "supported" packages, we can skip the full dependency & import boilerplate
%use lets-plot, krangl
// csv is courtesy of pro-football-reference: https://www.pro-football-reference.com/years/NFL/scoring.htm
val dfScoring = DataFrame.readCSV("nfl_scoring.csv")
dfScoring
Rk | Year | Tms | RshTD | RecTD | PR TD | KR TD | FblTD | IntTD | OthTD | AllTD | 2PM | 2PA | XPM | XPA | FGM | FGA | Sfty | Pts | Pts/G |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 2020 | 32 | 132 | 218 | 1 | 2 | 8 | 361 | 21 | 40 | 301 | 321 | 210 | 248 | 6 | 3151 | 25.8 | ||
2 | 2019 | 32 | 447 | 797 | 7 | 7 | 34 | 35 | 5 | 1332 | 54 | 113 | 1136 | 1210 | 802 | 983 | 17 | 11676 | 22.8 |
3 | 2018 | 32 | 439 | 847 | 7 | 5 | 24 | 45 | 4 | 1371 | 66 | 129 | 1164 | 1235 | 802 | 947 | 10 | 11948 | 23.3 |
4 | 2017 | 32 | 380 | 741 | 10 | 7 | 41 | 42 | 4 | 1225 | 37 | 82 | 1066 | 1134 | 866 | 1027 | 15 | 11118 | 21.7 |
5 | 2016 | 32 | 443 | 786 | 10 | 7 | 22 | 34 | 4 | 1306 | 51 | 105 | 1119 | 1195 | 850 | 1009 | 20 | 11647 | 22.8 |
6 | 2015 | 32 | 365 | 842 | 13 | 7 | 33 | 53 | 5 | 1318 | 45 | 94 | 1146 | 1217 | 834 | 987 | 16 | 11678 | 22.8 |
7 | 2014 | 32 | 380 | 807 | 13 | 6 | 28 | 47 | 12 | 1293 | 28 | 58 | 1222 | 1230 | 829 | 987 | 21 | 11565 | 22.6 |
8 | 2013 | 32 | 410 | 804 | 13 | 7 | 30 | 65 | 9 | 1338 | 34 | 69 | 1262 | 1267 | 863 | 998 | 20 | 11987 | 23.4 |
9 | 2012 | 32 | 401 | 757 | 18 | 13 | 26 | 71 | 11 | 1297 | 29 | 56 | 1229 | 1235 | 852 | 1016 | 13 | 11651 | 22.8 |
10 | 2011 | 32 | 400 | 745 | 20 | 9 | 31 | 49 | 5 | 1259 | 24 | 50 | 1200 | 1207 | 838 | 1011 | 21 | 11358 | 22.2 |
11 | 2010 | 32 | 399 | 751 | 13 | 23 | 22 | 57 | 5 | 1270 | 26 | 50 | 1203 | 1214 | 794 | 964 | 13 | 11283 | 22.0 |
12 | 2009 | 32 | 429 | 710 | 10 | 18 | 25 | 48 | 7 | 1247 | 24 | 59 | 1165 | 1185 | 756 | 930 | 14 | 10991 | 21.5 |
13 | 2008 | 32 | 476 | 646 | 16 | 13 | 33 | 52 | 10 | 1246 | 28 | 64 | 1170 | 1176 | 845 | 1000 | 21 | 11279 | 22.0 |
14 | 2007 | 32 | 386 | 720 | 17 | 25 | 37 | 52 | 6 | 1243 | 30 | 57 | 1165 | 1177 | 795 | 960 | 18 | 11104 | 21.7 |
15 | 2006 | 32 | 424 | 648 | 15 | 9 | 33 | 49 | 3 | 1181 | 21 | 35 | 1124 | 1135 | 767 | 942 | 12 | 10577 | 20.7 |
16 | 2005 | 32 | 431 | 644 | 9 | 12 | 23 | 47 | 6 | 1172 | 27 | 47 | 1099 | 1114 | 783 | 967 | 11 | 10556 | 20.6 |
17 | 2004 | 32 | 416 | 732 | 11 | 17 | 34 | 53 | 5 | 1268 | 37 | 73 | 1179 | 1189 | 703 | 870 | 15 | 11000 | 21.5 |
18 | 2003 | 32 | 427 | 654 | 18 | 13 | 24 | 58 | 4 | 1198 | 29 | 60 | 1110 | 1128 | 756 | 954 | 21 | 10666 | 20.8 |
19 | 2002 | 32 | 460 | 694 | 22 | 17 | 26 | 46 | 5 | 1270 | 47 | 81 | 1148 | 1165 | 737 | 951 | 12 | 11097 | 21.7 |
20 | 2001 | 31 | 365 | 635 | 12 | 10 | 33 | 59 | 6 | 1120 | 40 | 85 | 1008 | 1027 | 732 | 959 | 10 | 10024 | 20.2 |
... only showing top 20 rows
val mapScoring = dfScoring.filter { (it["Year"] lt 2020) AND (it["Year"] gt 1989) }.toMap()
mapScoring
{Rk=[Ljava.lang.Integer;@49f2b6ca, Year=[Ljava.lang.Integer;@2fd250ed, Tms=[Ljava.lang.Integer;@16d1f77d, RshTD=[Ljava.lang.Integer;@3c945a20, RecTD=[Ljava.lang.Integer;@608cff9e, PR TD=[Ljava.lang.String;@5de49e5a, KR TD=[Ljava.lang.String;@4497e084, FblTD=[Ljava.lang.String;@5a4cf76c, IntTD=[Ljava.lang.Integer;@5d3b93b4, OthTD=[Ljava.lang.String;@2454d007, AllTD=[Ljava.lang.Integer;@5f3c97c9, 2PM=[Ljava.lang.String;@6276fcd5, 2PA=[Ljava.lang.Integer;@63d1751c, XPM=[Ljava.lang.Integer;@61075589, XPA=[Ljava.lang.String;@5a3e14bf, FGM=[Ljava.lang.Integer;@ffbdb79, FGA=[Ljava.lang.String;@67ea360f, Sfty=[Ljava.lang.String;@3c9a8c66, Pts=[Ljava.lang.Integer;@16abeca6, Pts/G=[Ljava.lang.Double;@475add19}
val p = lets_plot(mapScoring) { x = "Year"; y = "Pts" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
ggtitle("Total Points per NFL regular season")
val p = lets_plot(mapScoring) { x = "Year"; y = "RecTD" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
ggtitle("Total Receiving Touchdowns per NFL regular season")
val dfScoringRanges = dfScoring
.filter { (it["Year"] lt 2020) AND (it["Year"] gt 1989) }
.addColumn("YearRange") { it["Year"].map<Double>{ floor(it.div(5.0)).times(5).toInt() }}
.addColumn("Years") { it["YearRange"].map<Int>{ "$it - ${it + 4}" }}
val mapScoringRanges = dfScoringRanges
.select({ listOf("Year", "Pts", "RecTD", "YearRange", "Years") })
.groupBy("YearRange", "Years")
.summarize(
"mean_Pts" to { it["Pts"].mean(removeNA = true) },
"mean_RecTD" to { it["RecTD"].mean(removeNA = true) }
).toMap()
val xlimits = listOf("1990 - 1994", "1995 - 1999", "2000 - 2004", "2005 - 2009", "2010 - 2014", "2015 - 2019")
val p = lets_plot(mapScoringRanges) { x = "Years"; y = "mean_Pts" } + ggsize(720, 240)
p + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
ggtitle("Average total points per NFL regular season")
ggsave(p + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
ggtitle("Average total points per NFL regular season"), "avg_points_binned.png")
E:\CFNine\winprobability\scraping\lets-plot-images\avg_points_binned.png
val p2 = lets_plot(mapScoringRanges) { x = "Years"; y = "mean_RecTD" } + ggsize(720, 240)
p2 + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
ggtitle("Average Receiving Touchdowns per NFL regular season")
ggsave(p2 + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
ggtitle("Average Receiving Touchdowns per NFL regular season"), "avg_rectd_binned.png")
E:\CFNine\winprobability\scraping\lets-plot-images\avg_rectd_binned.png