This is an experiment in using the Kotlin kernel for Jupyter¶

In [13]:

// two "supported" packages, we can skip the full dependency & import boilerplate
%use lets-plot, krangl

In [14]:

// csv is courtesy of pro-football-reference: https://www.pro-football-reference.com/years/NFL/scoring.htm
val dfScoring = DataFrame.readCSV("nfl_scoring.csv")
dfScoring

Out[14]:

Rk	Year	Tms	RshTD	RecTD	PR TD	KR TD	FblTD	IntTD	OthTD	AllTD	2PM	2PA	XPM	XPA	FGM	FGA	Sfty	Pts	Pts/G
1	2020	32	132	218		1	2	8		361	21	40	301	321	210	248	6	3151	25.8
2	2019	32	447	797	7	7	34	35	5	1332	54	113	1136	1210	802	983	17	11676	22.8
3	2018	32	439	847	7	5	24	45	4	1371	66	129	1164	1235	802	947	10	11948	23.3
4	2017	32	380	741	10	7	41	42	4	1225	37	82	1066	1134	866	1027	15	11118	21.7
5	2016	32	443	786	10	7	22	34	4	1306	51	105	1119	1195	850	1009	20	11647	22.8
6	2015	32	365	842	13	7	33	53	5	1318	45	94	1146	1217	834	987	16	11678	22.8
7	2014	32	380	807	13	6	28	47	12	1293	28	58	1222	1230	829	987	21	11565	22.6
8	2013	32	410	804	13	7	30	65	9	1338	34	69	1262	1267	863	998	20	11987	23.4
9	2012	32	401	757	18	13	26	71	11	1297	29	56	1229	1235	852	1016	13	11651	22.8
10	2011	32	400	745	20	9	31	49	5	1259	24	50	1200	1207	838	1011	21	11358	22.2
11	2010	32	399	751	13	23	22	57	5	1270	26	50	1203	1214	794	964	13	11283	22.0
12	2009	32	429	710	10	18	25	48	7	1247	24	59	1165	1185	756	930	14	10991	21.5
13	2008	32	476	646	16	13	33	52	10	1246	28	64	1170	1176	845	1000	21	11279	22.0
14	2007	32	386	720	17	25	37	52	6	1243	30	57	1165	1177	795	960	18	11104	21.7
15	2006	32	424	648	15	9	33	49	3	1181	21	35	1124	1135	767	942	12	10577	20.7
16	2005	32	431	644	9	12	23	47	6	1172	27	47	1099	1114	783	967	11	10556	20.6
17	2004	32	416	732	11	17	34	53	5	1268	37	73	1179	1189	703	870	15	11000	21.5
18	2003	32	427	654	18	13	24	58	4	1198	29	60	1110	1128	756	954	21	10666	20.8
19	2002	32	460	694	22	17	26	46	5	1270	47	81	1148	1165	737	951	12	11097	21.7
20	2001	31	365	635	12	10	33	59	6	1120	40	85	1008	1027	732	959	10	10024	20.2

... only showing top 20 rows

In [3]:

val mapScoring = dfScoring.filter { (it["Year"] lt 2020) AND (it["Year"] gt 1989) }.toMap()
mapScoring

Out[3]:

{Rk=[Ljava.lang.Integer;@49f2b6ca, Year=[Ljava.lang.Integer;@2fd250ed, Tms=[Ljava.lang.Integer;@16d1f77d, RshTD=[Ljava.lang.Integer;@3c945a20, RecTD=[Ljava.lang.Integer;@608cff9e, PR TD=[Ljava.lang.String;@5de49e5a, KR TD=[Ljava.lang.String;@4497e084, FblTD=[Ljava.lang.String;@5a4cf76c, IntTD=[Ljava.lang.Integer;@5d3b93b4, OthTD=[Ljava.lang.String;@2454d007, AllTD=[Ljava.lang.Integer;@5f3c97c9, 2PM=[Ljava.lang.String;@6276fcd5, 2PA=[Ljava.lang.Integer;@63d1751c, XPM=[Ljava.lang.Integer;@61075589, XPA=[Ljava.lang.String;@5a3e14bf, FGM=[Ljava.lang.Integer;@ffbdb79, FGA=[Ljava.lang.String;@67ea360f, Sfty=[Ljava.lang.String;@3c9a8c66, Pts=[Ljava.lang.Integer;@16abeca6, Pts/G=[Ljava.lang.Double;@475add19}

In [4]:

val p = lets_plot(mapScoring) { x = "Year"; y = "Pts" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
    ggtitle("Total Points per NFL regular season")

Out[4]:

In [5]:

val p = lets_plot(mapScoring) { x = "Year"; y = "RecTD" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
    ggtitle("Total Receiving Touchdowns per NFL regular season")

Out[5]:

In [6]:

val dfScoringRanges = dfScoring
    .filter { (it["Year"] lt 2020) AND (it["Year"] gt 1989) }
    .addColumn("YearRange") { it["Year"].map<Double>{ floor(it.div(5.0)).times(5).toInt() }}
    .addColumn("Years") { it["YearRange"].map<Int>{ "$it - ${it + 4}" }}

val mapScoringRanges = dfScoringRanges
    .select({ listOf("Year", "Pts", "RecTD", "YearRange", "Years") })
    .groupBy("YearRange", "Years")
    .summarize(
        "mean_Pts" to { it["Pts"].mean(removeNA = true) },
        "mean_RecTD" to { it["RecTD"].mean(removeNA = true) }
    ).toMap()
    
val xlimits = listOf("1990 - 1994", "1995 - 1999", "2000 - 2004", "2005 - 2009", "2010 - 2014", "2015 - 2019")

In [9]:

val p = lets_plot(mapScoringRanges) { x = "Years"; y = "mean_Pts" } + ggsize(720, 240)
p + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average total points per NFL regular season")

Out[9]:

In [10]:

ggsave(p + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average total points per NFL regular season"), "avg_points_binned.png")

Out[10]:

E:\CFNine\winprobability\scraping\lets-plot-images\avg_points_binned.png

In [11]:

val p2 = lets_plot(mapScoringRanges) { x = "Years"; y = "mean_RecTD" } + ggsize(720, 240)
p2 + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average Receiving Touchdowns per NFL regular season")

Out[11]:

In [12]:

ggsave(p2 + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average Receiving Touchdowns per NFL regular season"), "avg_rectd_binned.png")

Out[12]:

E:\CFNine\winprobability\scraping\lets-plot-images\avg_rectd_binned.png

In [ ]: