This is an experiment in using the Kotlin kernel for Jupyter

In [13]:
// two "supported" packages, we can skip the full dependency & import boilerplate
%use lets-plot, krangl
In [14]:
// csv is courtesy of pro-football-reference: https://www.pro-football-reference.com/years/NFL/scoring.htm
val dfScoring = DataFrame.readCSV("nfl_scoring.csv")
dfScoring
Out[14]:
RkYearTmsRshTDRecTDPR TDKR TDFblTDIntTDOthTDAllTD2PM2PAXPMXPAFGMFGASftyPtsPts/G
120203213221812836121403013212102486315125.8
2201932447797773435513325411311361210802983171167622.8
3201832439847752445413716612911641235802947101194823.3
42017323807411074142412253782106611348661027151111821.7
520163244378610722344130651105111911958501009201164722.8
6201532365842137335351318459411461217834987161167822.8
72014323808071362847121293285812221230829987211156522.6
8201332410804137306591338346912621267863998201198723.4
9201232401757181326711112972956122912358521016131165122.8
102011324007452093149512592450120012078381011211135822.2
112010323997511323225751270265012031214794964131128322.0
122009324297101018254871247245911651185756930141099121.5
13200832476646161333521012462864117011768451000211127922.0
142007323867201725375261243305711651177795960181110421.7
15200632424648159334931181213511241135767942121057720.7
16200532431644912234761172274710991114783967111055620.6
172004324167321117345351268377311791189703870151100021.5
182003324276541813245841198296011101128756954211066620.8
192002324606942217264651270478111481165737951121109721.7
202001313656351210335961120408510081027732959101002420.2

... only showing top 20 rows

In [3]:
val mapScoring = dfScoring.filter { (it["Year"] lt 2020) AND (it["Year"] gt 1989) }.toMap()
mapScoring
Out[3]:
{Rk=[Ljava.lang.Integer;@49f2b6ca, Year=[Ljava.lang.Integer;@2fd250ed, Tms=[Ljava.lang.Integer;@16d1f77d, RshTD=[Ljava.lang.Integer;@3c945a20, RecTD=[Ljava.lang.Integer;@608cff9e, PR TD=[Ljava.lang.String;@5de49e5a, KR TD=[Ljava.lang.String;@4497e084, FblTD=[Ljava.lang.String;@5a4cf76c, IntTD=[Ljava.lang.Integer;@5d3b93b4, OthTD=[Ljava.lang.String;@2454d007, AllTD=[Ljava.lang.Integer;@5f3c97c9, 2PM=[Ljava.lang.String;@6276fcd5, 2PA=[Ljava.lang.Integer;@63d1751c, XPM=[Ljava.lang.Integer;@61075589, XPA=[Ljava.lang.String;@5a3e14bf, FGM=[Ljava.lang.Integer;@ffbdb79, FGA=[Ljava.lang.String;@67ea360f, Sfty=[Ljava.lang.String;@3c9a8c66, Pts=[Ljava.lang.Integer;@16abeca6, Pts/G=[Ljava.lang.Double;@475add19}
In [4]:
val p = lets_plot(mapScoring) { x = "Year"; y = "Pts" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
    ggtitle("Total Points per NFL regular season")
Out[4]:
In [5]:
val p = lets_plot(mapScoring) { x = "Year"; y = "RecTD" } + ggsize(640, 240)
p + geom_bar(stat=Stat.identity) +
    ggtitle("Total Receiving Touchdowns per NFL regular season")
Out[5]:
In [6]:
val dfScoringRanges = dfScoring
    .filter { (it["Year"] lt 2020) AND (it["Year"] gt 1989) }
    .addColumn("YearRange") { it["Year"].map<Double>{ floor(it.div(5.0)).times(5).toInt() }}
    .addColumn("Years") { it["YearRange"].map<Int>{ "$it - ${it + 4}" }}

val mapScoringRanges = dfScoringRanges
    .select({ listOf("Year", "Pts", "RecTD", "YearRange", "Years") })
    .groupBy("YearRange", "Years")
    .summarize(
        "mean_Pts" to { it["Pts"].mean(removeNA = true) },
        "mean_RecTD" to { it["RecTD"].mean(removeNA = true) }
    ).toMap()
    
val xlimits = listOf("1990 - 1994", "1995 - 1999", "2000 - 2004", "2005 - 2009", "2010 - 2014", "2015 - 2019")
In [9]:
val p = lets_plot(mapScoringRanges) { x = "Years"; y = "mean_Pts" } + ggsize(720, 240)
p + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average total points per NFL regular season")
Out[9]:
In [10]:
ggsave(p + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average total points per NFL regular season"), "avg_points_binned.png")
Out[10]:
E:\CFNine\winprobability\scraping\lets-plot-images\avg_points_binned.png
In [11]:
val p2 = lets_plot(mapScoringRanges) { x = "Years"; y = "mean_RecTD" } + ggsize(720, 240)
p2 + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average Receiving Touchdowns per NFL regular season")
Out[11]:
In [12]:
ggsave(p2 + geom_bar(stat=Stat.identity) + scale_x_discrete(limits = xlimits) +
    ggtitle("Average Receiving Touchdowns per NFL regular season"), "avg_rectd_binned.png")
Out[12]:
E:\CFNine\winprobability\scraping\lets-plot-images\avg_rectd_binned.png
In [ ]:

In [ ]: