Let's find the top individual series by individual shot attempts.

In [1]:
import PbPMethods2 as pm2
import pandas as pd
In [2]:
dfs = []
for season in range(2007, 2017):
    dfs.append(pd.read_csv(pm2.get_gamebygame_data_filename(season)))
dfs = pd.concat(dfs)
dfs.head()
Out[2]:
Player Team Pos Game Season Date TOION(60s) CFON CAON TOIOFF(60s) ... NZS OZS iG iCF iA1 iA2 SACFON SACAON SACFOFF SACAOFF
0 Patrick O'Sullivan L.A F 20001 2007 29-Sep-07 0.110556 3 4 0.457500 ... 2 2 0 1 0 0 3.324 3.554 8.560 22.067
1 Tom Preissing L.A D 20001 2007 29-Sep-07 0.154444 5 7 0.413611 ... 4 2 0 1 0 0 5.430 6.509 6.454 19.112
2 Lubomir Visnovsky L.A D 20001 2007 29-Sep-07 0.185833 2 9 0.382222 ... 5 3 0 0 0 0 2.077 8.354 9.807 17.267
3 Raitis Ivanans L.A F 20001 2007 29-Sep-07 0.059722 1 2 0.508333 ... 2 0 0 0 0 0 1.120 1.826 10.764 23.795
4 Dustin Brown L.A F 20001 2007 29-Sep-07 0.163889 4 9 0.404167 ... 3 2 0 1 0 0 4.228 8.040 7.656 17.581

5 rows × 35 columns

Filter for the playoffs and add a variable with the round number

In [3]:
playoffs = dfs.query('Game > 30000')
playoffs = playoffs[['Player', 'Team', 'Game', 'Season', 'TOION(60s)', 'iCF']]
playoffs['Round'] = (playoffs['Game'] - 30000) // 100
playoffs.head()
Out[3]:
Player Team Game Season TOION(60s) iCF Round
44227 Alex Kovalev MTL 30111 2007 0.205278 4 1
44228 Steve Begin MTL 30111 2007 0.134444 1 1
44229 Maxim Lapierre MTL 30111 2007 0.183611 6 1
44230 Mike Komisarek MTL 30111 2007 0.214722 3 1
44231 Mark Streit MTL 30111 2007 0.201111 3 1

Group by round to get gp and counts

In [4]:
player_gp = playoffs.groupby(['Player', 'Team', 'Season', 'Round']).count()
player_gp.reset_index(inplace = True)
player_gp.rename(columns = {'Game': 'GP'}, inplace = True)
player_gp = player_gp[['Player', 'Team', 'Season', 'Round', 'GP']]
player_gp.head()
Out[4]:
Player Team Season Round GP
0 Aaron Ekblad FLA 2015 1 6
1 Aaron Rome CBJ 2008 1 1
2 Aaron Rome DAL 2013 1 1
3 Aaron Rome VAN 2009 1 1
4 Aaron Rome VAN 2010 1 2
In [5]:
player_rounds = playoffs.groupby(['Player', 'Team', 'Season', 'Round']).sum()
player_rounds.drop(['Game'], axis = 1, inplace = True)
player_rounds.reset_index(inplace = True)

#Join to get the gp column
player_rounds = player_rounds.merge(player_gp, on = ['Player', 'Team', 'Season', 'Round'], how = 'inner')

player_rounds['iCF60'] = player_rounds['iCF'] / player_rounds['TOION(60s)']
player_rounds.sort_values(by = 'iCF60', ascending = False, inplace = True)
player_rounds.head()
Out[5]:
Player Team Season Round TOION(60s) iCF GP iCF60
4533 Patrick Wiercioch OTT 2012 1 0.019167 1 1 52.173913
5605 Tomas Kopecky CHI 2010 1 0.039444 2 1 50.704225
1904 Emerson Etem ANA 2013 1 0.225000 10 2 44.444444
170 Alexander Semin WSH 2009 1 1.550556 62 7 39.985668
337 Andrew Shaw CHI 2013 2 0.050278 2 1 39.779006

Filter for at least six games (optional)

In [6]:
temp = player_rounds.query('GP >= 6')
temp.reset_index(inplace = True, drop = True) #so index = ranks
temp.head(20)
Out[6]:
Player Team Season Round TOION(60s) iCF GP iCF60
0 Alexander Semin WSH 2009 1 1.550556 62 7 39.985668
1 Alex Ovechkin WSH 2008 1 1.748056 58 7 33.179724
2 Patric Hornqvist NSH 2010 1 1.167778 37 6 31.684110
3 Alex Ovechkin WSH 2009 1 1.751944 54 7 30.822895
4 Max Pacioretty MTL 2016 1 1.510833 46 6 30.446773
5 Alex Ovechkin WSH 2014 1 1.937778 57 7 29.415138
6 Alex Ovechkin WSH 2012 1 1.839167 54 7 29.361124
7 Alexander Semin WSH 2007 1 1.583333 46 7 29.052632
8 Eric Fehr WSH 2009 1 1.105556 32 7 28.944724
9 Max Pacioretty MTL 2014 2 1.493333 43 6 28.794643
10 Phil Kessel PIT 2015 4 1.576111 45 6 28.551287
11 Alex Ovechkin WSH 2008 2 2.234444 63 7 28.194928
12 Patrick Sharp CHI 2012 4 1.623333 44 6 27.104723
13 Brandon Bollig CGY 2014 1 0.671944 18 6 26.787929
14 Craig Smith NSH 2014 1 1.648611 44 6 26.689132
15 Mikael Samuelsson DET 2007 1 1.242500 33 6 26.559356
16 Shawn Thornton BOS 2007 1 0.909167 24 7 26.397800
17 Henrik Zetterberg DET 2007 4 1.705556 45 6 26.384365
18 Alex Ovechkin WSH 2014 2 1.898611 50 7 26.335040
19 Alexander Semin WSH 2008 1 1.457778 38 7 26.067073