Let's find the top individual series by shot attempts.

In [1]:
import PbPMethods2 as pm2
import pandas as pd
In [2]:
dfs = []
for season in range(2007, 2017):
    dfs.append(pd.read_csv(pm2.get_gamebygame_data_filename(season)))
dfs = pd.concat(dfs)
dfs.head()
Out[2]:
Player Team Pos Game Season Date TOION(60s) CFON CAON TOIOFF(60s) ... NZS OZS iG iCF iA1 iA2 SACFON SACAON SACFOFF SACAOFF
0 Patrick O'Sullivan L.A F 20001 2007 29-Sep-07 0.110556 3 4 0.457500 ... 2 2 0 1 0 0 3.324 3.554 8.560 22.067
1 Tom Preissing L.A D 20001 2007 29-Sep-07 0.154444 5 7 0.413611 ... 4 2 0 1 0 0 5.430 6.509 6.454 19.112
2 Lubomir Visnovsky L.A D 20001 2007 29-Sep-07 0.185833 2 9 0.382222 ... 5 3 0 0 0 0 2.077 8.354 9.807 17.267
3 Raitis Ivanans L.A F 20001 2007 29-Sep-07 0.059722 1 2 0.508333 ... 2 0 0 0 0 0 1.120 1.826 10.764 23.795
4 Dustin Brown L.A F 20001 2007 29-Sep-07 0.163889 4 9 0.404167 ... 3 2 0 1 0 0 4.228 8.040 7.656 17.581

5 rows × 35 columns

Filter for the playoffs and add a variable with the round number

In [3]:
playoffs = dfs.query('Game > 30000')
playoffs = playoffs[['Player', 'Team', 'Game', 'Season', 'TOION(60s)', 'CFON', 'CAON']]
playoffs['Round'] = (playoffs['Game'] - 30000) // 100
playoffs.head()
Out[3]:
Player Team Game Season TOION(60s) CFON CAON Round
44227 Alex Kovalev MTL 30111 2007 0.205278 15 12 1
44228 Steve Begin MTL 30111 2007 0.134444 5 8 1
44229 Maxim Lapierre MTL 30111 2007 0.183611 12 15 1
44230 Mike Komisarek MTL 30111 2007 0.214722 19 13 1
44231 Mark Streit MTL 30111 2007 0.201111 14 15 1

Group by round to get gp and counts

In [4]:
player_gp = playoffs.groupby(['Player', 'Team', 'Season', 'Round']).count()
player_gp.reset_index(inplace = True)
player_gp.rename(columns = {'Game': 'GP'}, inplace = True)
player_gp = player_gp[['Player', 'Team', 'Season', 'Round', 'GP']]
player_gp.head()
Out[4]:
Player Team Season Round GP
0 Aaron Ekblad FLA 2015 1 6
1 Aaron Rome CBJ 2008 1 1
2 Aaron Rome DAL 2013 1 1
3 Aaron Rome VAN 2009 1 1
4 Aaron Rome VAN 2010 1 2
In [5]:
player_rounds = playoffs.groupby(['Player', 'Team', 'Season', 'Round']).sum()
player_rounds.drop(['Game'], axis = 1, inplace = True)
player_rounds.reset_index(inplace = True)

#Join to get the gp column
player_rounds = player_rounds.merge(player_gp, on = ['Player', 'Team', 'Season', 'Round'], how = 'inner')

player_rounds['CF60'] = player_rounds['CFON'] / player_rounds['TOION(60s)']
player_rounds['CA60'] = player_rounds['CAON'] / player_rounds['TOION(60s)']
player_rounds['CD60'] = player_rounds['CF60'] - player_rounds['CA60']
player_rounds.sort_values(by = 'CD60', ascending = False, inplace = True)
player_rounds.head()
Out[5]:
Player Team Season Round TOION(60s) CFON CAON GP CF60 CA60 CD60
3253 Lars Eller MTL 2012 1 0.100556 15 2 1 149.171271 19.889503 129.281768
4533 Patrick Wiercioch OTT 2012 1 0.019167 3 1 1 156.521739 52.173913 104.347826
337 Andrew Shaw CHI 2013 2 0.050278 6 1 1 119.337017 19.889503 99.447514
4815 Robert Bortuzzo STL 2015 2 0.195833 20 3 1 102.127660 15.319149 86.808511
2569 Jimmy Hayes CHI 2011 1 0.334722 37 9 2 110.539419 26.887967 83.651452

Filter for at least six games (optional)

In [6]:
temp = player_rounds.query('GP >= 6')
temp.reset_index(inplace = True, drop = True) #so index = ranks
temp.head(20)
Out[6]:
Player Team Season Round TOION(60s) CFON CAON GP CF60 CA60 CD60
0 Andre Burakovsky WSH 2016 2 1.198889 111 43 6 92.585728 35.866543 56.719184
1 Jiri Hudler DET 2007 1 0.895556 77 32 6 85.980149 35.732010 50.248139
2 Henrik Sedin VAN 2014 1 1.407222 112 43 6 79.589420 30.556652 49.032767
3 Henrik Zetterberg DET 2007 4 1.705556 154 71 6 90.293160 41.628664 48.664495
4 Marc-Andre Bergeron T.B 2010 3 1.525556 127 56 7 83.248361 36.707939 46.540422
5 Chris Chelios DET 2007 1 0.940278 73 30 6 77.636632 31.905465 45.731167
6 Chris Kunitz PIT 2010 1 1.165278 97 45 6 83.241955 38.617402 44.624553
7 Mikael Samuelsson DET 2007 1 1.242500 101 46 6 81.287726 37.022133 44.265594
8 Daniel Sedin VAN 2014 1 1.383611 107 46 6 77.333869 33.246336 44.087533
9 Darren Helm DET 2007 4 0.639722 40 12 6 62.527139 18.758142 43.768997
10 Sidney Crosby PIT 2009 1 1.568056 135 67 6 86.093888 42.728078 43.365810
11 Lars Eller WSH 2016 2 1.131667 103 54 6 91.016200 47.717231 43.298969
12 Mike Green WSH 2009 1 1.993056 192 108 7 96.334495 54.188153 42.146341
13 Pavel Datsyuk DET 2008 2 1.853056 133 55 7 71.773347 29.680708 42.092640
14 Brian Rafalski DET 2007 4 1.958056 161 79 6 82.224429 40.346148 41.878281
15 Joe Pavelski S.J 2009 1 1.516111 126 63 6 83.107365 41.553683 41.553683
16 Brett Lebda DET 2007 4 1.305000 95 41 6 72.796935 31.417625 41.379310
17 Jiri Hudler DET 2007 4 0.945556 64 25 6 67.685076 26.439483 41.245593
18 Tomas Holmstrom DET 2008 2 1.370000 97 42 7 70.802920 30.656934 40.145985
19 Ryane Clowe S.J 2009 1 1.588611 131 68 6 82.461969 42.804686 39.657283