The League of Champions
Analyzing the return of investments made by English Premier League Clubs on their player purchases
Team : [ Natarajan Chakrapani , Kuldeep Kapade , Mark Davidoff ]
from IPython.core.display import Image
Image(filename='/Users/natarajan/Dropbox/Soccer - WWOD/Player_positions.png')
Image('http://www.soccer-training-guide.com/images/wikipedia_positions.png') #all player positions on the field
Image('http://www.soccer-training-guide.com/images/4-4-2-new.jpg')
def get_players_data(players, rows):
'''helper function to parse player data on who were transferred in the English premier league '''
for row in rows:
players_data = { }
for (i,item) in enumerate(row.find_all('td')):
#print i, item.get_text()
if i == 3:
players_data['p_name'] = item.get_text()
parts = item.find('a').get('href').split('/')
players_data['handle'] = parts[2]
players_data['id'] = parts[4].split('_')[1].split('.')[0]
if i == 4:
players_data['club'] = item.get_text()
if i == 5:
players_data['age'] = item.get_text()
if i == 7:
players_data['position'] = item.get_text()
if i == 9:
players_data['from'] = item.get_text()
if i == 11:
players_data['to'] = item.get_text()
if i == 12:
fee = item.get_text().split(' ')
players_data['transfer_fee_pounds'] = fee[0]
players_data['transfer_fee_euros'] = fee[1][1:]
players.append(players_data)
def fetch_data(players, year):
''' given a particular year , fetch transferred player data from English premier league'''
url = 'http://www.transfermarkt.co.uk/en/premier-league/transferrekorde/wettbewerb_GB1_%s_default_default_default_alle.html' % (year)
r = requests.get(url)
html_doc = r.text
soup = BeautifulSoup(html_doc)
table= soup.find('table' , {'class':"tabelle_grafik"})
rows = table.find_all('tr' , {'class':"hell"})
get_players_data(players, rows)
rows = table.find_all('tr' , {'class':"dunkel"})
get_players_data(players, rows[1:])
def extend_data(players, new_players, year):
for player in players:
url = 'http://www.transfermarkt.co.uk/en/%s/leistungsdaten/spieler_%s_%s.html' % (player['handle'], player['id'], year)
r = requests.get(url)
html_doc = r.text
soup = BeautifulSoup(html_doc)
table = soup.find('table' , {'class':"standard_tabelle"})
row = table.find('tr' , {'class':"hell"}) #Assuming its in first row
for (i,item) in enumerate(row.find_all('td')):
#print i, item.get_text()
if i == 2:
if item.get_text() != '-':
player['matches'] = item.get_text()
else:
player['matches'] = 0
if i == 3:
if item.get_text() != '-':
player['goals'] = item.get_text()
else:
player['goals'] = 0
if i == 5:
if item.get_text() != '-':
player['assists'] = item.get_text()
else:
player['assists'] = 0
if i == 11:
if item.get_text() != '-':
player['minutes_per_goal'] = item.get_text()
else:
player['minutes_per_goal'] = 0
if i == 12:
if item.get_text() != '-':
player['minutes'] = item.get_text()
else:
player['minutes'] = 0
new_players.append(player)
# dump data from source to pkl files - to be run just once
import pickle,requests
from bs4 import BeautifulSoup
import requests , re
players = []
years = ['2010', '2011' , '2012']
for year in years :
#year = '2012'
fetch_data(players, year)
new_players = []
extend_data(players, new_players, year)
f_name = "players_data_" + year +".pkl"
pickle.dump( new_players, open( f_name, "wb" ) )
#print new_players[0]
#pickle.dump( new_players, open( "players_data_2012.pkl", "wb" ) )
{'handle': 'eden-hazard', 'minutes': u'2372', 'from': u'Lille', 'club': u'Chelsea', 'transfer_fee_pounds': u'35.000.000', 'age': u'21', 'matches': u'31', 'transfer_fee_euros': u'40.000.000', 'minutes_per_goal': u'264', 'to': u'Chelsea', 'assists': u'12', 'goals': u'9', 'position': u'LW', 'p_name': u'Eden Hazard', 'id': '50202'}
# load data from pkl files to a dataframe
import numpy as np
import pandas as pd
import pickle,matplotlib.pyplot as plt
from pandas import Series, DataFrame
f = open('players_data_2010.pkl')
#f = open(cloud.files.get('players_data_2010.pkl') )
p = pickle.load(f)
player_df_2010 = DataFrame(p)
player_df_2010['season']='10/11'
f = open('players_data_2011.pkl')
p = pickle.load(f)
player_df_2011 = DataFrame(p)
player_df_2011['season']='11/12'
f = open('players_data_2012.pkl')
p = pickle.load(f)
player_df_2012 = DataFrame(p)
player_df_2012['season']='12/13'
player_df = pd.concat([player_df_2010,player_df_2011,player_df_2012])
print player_df.columns
player_df.set_index('id',inplace=True,drop=False)
print player_df[:1]
# data cleanup - converting strings to numerics for the appropriate fields
player_df['transfer_fee_euros'] = player_df.transfer_fee_euros.str.replace('.','')
player_df['transfer_fee_euros'] =player_df.transfer_fee_euros.astype(float64)
player_df['minutes'] = player_df.minutes.astype(float64)
player_df['goals'] = player_df.goals.astype(float64)
player_df['age'] = player_df.age.astype(float64)
player_df['assists'] = player_df.assists.astype(float64)
player_df.set_index(['id'], inplace=True, drop=False)
player_df['money_by_minutes']= player_df['transfer_fee_euros']/player_df['minutes'].astype(float64)
player_df['money_by_goal_minutes'] = player_df['transfer_fee_euros']/player_df['minutes_per_goal'].astype(float64)
player_df['money_by_assists'] = player_df['transfer_fee_euros']/player_df['assists'].astype(float64)
player_df['money_by_goals'] = player_df['transfer_fee_euros']/player_df['goals'].astype(float64)
player_df['money_by_matches'] = player_df['transfer_fee_euros']/player_df['matches'].astype(float64)
# only consider player purchases done by teams from English premier league
epl_teams = [ u'Southampton', u'Aston Villa', u'Man Utd', u'Swansea', u'Liverpool', u'Blackburn', u'Everton', u'Fulham', u'Newcastle', u'West Ham', u'QPR', u'Wolves', u'Chelsea', u'Spurs', u'Man City', u'Stoke City', u'Arsenal', u'Sunderland', u'Wigan', u'Birmingham', u'West Brom']
epl_to=[]
for team in epl_teams:
epl_to.append( player_df[player_df.to == team])
player_df = pd.concat(epl_to)
#print len(player_df)
player_df = player_df.sort_index(by='season')
#print player_df
Index([age, assists, club, from, goals, handle, id, matches, minutes, minutes_per_goal, p_name, position, to, transfer_fee_euros, transfer_fee_pounds, season], dtype=object) age assists club from goals handle id matches minutes \ id 7767 26 6 Chelsea Liverpool 10 fernando-torres 7767 37 2660 minutes_per_goal p_name position to transfer_fee_euros \ id 7767 266 Fernando Torres CF Chelsea 58.500.000 transfer_fee_pounds season id 7767 51.500.000 10/11
/Library/Python/2.7/site-packages/matplotlib-1.1.0-py2.7-macosx-10.7-intel.egg/pytz/__init__.py:35: UserWarning: Module pytz was already imported from /Library/Python/2.7/site-packages/matplotlib-1.1.0-py2.7-macosx-10.7-intel.egg/pytz/__init__.pyc, but /Library/Python/2.7/site-packages/pytz-2011k-py2.7.egg is being added to sys.path from pkg_resources import resource_stream
# consider only players transferred to EPL teams . Disregard players transferred to non EPL teams.
epl_teams = [ u'Southampton', u'Aston Villa', u'Man Utd', u'Swansea', u'Liverpool', u'Blackburn', u'Everton', u'Fulham', u'Newcastle', u'West Ham', u'QPR', u'Wolves', u'Chelsea', u'Spurs', u'Man City', u'Stoke City', u'Arsenal', u'Sunderland', u'Wigan', u'Birmingham', u'West Brom']
defence = ['SW' , 'CB' , 'LB' , 'RB']
midfield = ['DM', 'CM', 'RM','LM', 'AM', 'LW', 'RW']
striker = ['LW', 'RW', 'SS', 'CF']
attack ={}
def plot_acquisitions_by_positions(pos_list,label):
d = DataFrame(index=epl_teams)
for season in player_df.season.unique():
for team in epl_teams:
if label=='Midfield':
attack[team]=len(player_df[(player_df.season==season) & (player_df['position'].isin(pos_list) | ( player_df['id'].isin(['7858','14086','4063','15921','50202','15921','35251'])) ) & (player_df.to ==team)])
elif label== 'Attack':
attack[team]=len(player_df[(player_df.season==season) & ((player_df['position'].isin(pos_list)) | ( player_df['id'].isin(['31552','15185'])))& (player_df.to ==team)])
elif label =='Defence':
#print team,season, len(player_df[(player_df.season==season) & (player_df['position'].isin(['SW' , 'CB' ,'LB', 'RB'])) & (player_df.to ==team)][['p_name','position']])
attack[team]= len(player_df[(player_df.season==season) & (player_df['position'].isin(pos_list)) & (player_df.to ==team)])
g=Series(attack)
g.name= label +"_"+ season
d=d.join(g)
print d.plot(kind='barh', figsize=(10,10),title='trend of ' + label + ' player acquistions')
plot_acquisitions_by_positions(defence,'Defence')
plot_acquisitions_by_positions(midfield,'Midfield')
plot_acquisitions_by_positions(striker,'Attack')
#.plot(kind='barh',figsize=(5,10))
#Series().plot(kind='barh')
#print len(player_df[(player_df.season=='11/12') & (player_df['position'].isin(['SW' , 'CB' ,'LB', 'RB'])) & (player_df.to =='Arsenal')][['p_name','position']])
#print player_df[player_df['position'].isin(['SW' , 'CB' ,'LB', 'RB'])][['pos_type']]
Axes(0.125,0.125;0.775x0.775) Axes(0.125,0.125;0.775x0.775) Axes(0.125,0.125;0.775x0.775)
#pieces = DataFrame()
#print player_df.groupby(['to','season'])['transfer_fee_euros'].mean()
for season in player_df.season.unique():
# print season
p1 = player_df[player_df.season==season].groupby(['to'])['transfer_fee_euros'].mean()
p1.name = season
#print p1
#print p1
if season == '10/11':
club_purchase = DataFrame(p1)
else:
club_purchase = club_purchase.join(p1)
club_purchase = club_purchase.fillna(value=0)
#position_compare.sort_index(
#position_compare = position_compare.transpose()
print club_purchase.sort_index(ascending=False).plot(kind='barh')
Axes(0.125,0.125;0.775x0.775)
# get unique players who have been transferred more than once across seasons from 2010 to 2013
transfr_more_than_once = player_df.ix[player_df.id.value_counts() > 1 ].sort_index(by=['id','season'])['id'].unique()
print "Players who were transferred between the EPL teams more than once"
# list of all players who have been transferred more than once from 2010 -2013
print player_df.ix[transfr_more_than_once].sort_index(by=['id','season'])[['p_name','from','to','transfer_fee_euros','position','season','goals']]
# graph player transfer amount rise/fall through the 3 seasons.
def transfr_amt_by_season(p_id):
return player_df[(player_df.id==p_id)][['season','transfer_fee_euros']].set_index(keys='season')
name_df = DataFrame(index=['10/11','11/12','12/13'])
for item in np.unique(transfr_more_than_once):
#print item
#print np.unique(player_df.ix[item]['name'])
name_df[np.unique(player_df.ix[item]['p_name'])] = transfr_amt_by_season(item)
#name_df= name_df.where(pd.notnull(name_df),0)
name_df= name_df.fillna(method='ffill') # assuming the cost to club is same , if club retains a player for successive seasons
print " \n\n Tabulate player acquisition costs for players who were transferred more than once in the last 3 EPL seasons"
print name_df
print "\n\n plot of player acquisition costs for players who were transferred more than once in the last 3 EPL seasons"
print name_df.plot(figsize=(10,7))
Players who were transferred between the EPL teams more than once p_name from to transfer_fee_euros position season \ id 13168 Raul Meireles FC Porto Liverpool 13000000 CM 10/11 13168 Raul Meireles Liverpool Chelsea 13500000 CM 11/12 19368 Moussa Dembélé AZ Alkmaar Fulham 6000000 CM 10/11 19368 Moussa Dembélé Fulham Spurs 19000000 CM 12/13 35099 Steven Fletcher Burnley Wolves 7200000 CF 10/11 35099 Steven Fletcher Wolves Sunderland 15200000 CF 12/13 goals id 13168 5 13168 2 19368 3 19368 1 35099 10 35099 11 Tabulate player acquisition costs for players who were transferred more than once in the last 3 EPL seasons Raul Meireles Moussa Dembélé Steven Fletcher 10/11 13000000 6000000 7200000 11/12 13500000 6000000 7200000 12/13 13500000 19000000 15200000 plot of player acquisition costs for players who were transferred more than once in the last 3 EPL seasons Axes(0.125,0.125;0.775x0.775)
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
#player_df.transfer_fee_euros.type
#player_df.position
player_df.sort_index(by='transfer_fee_euros')[::][["p_name", "club",'transfer_fee_euros','from','to']]
grouped= player_df.groupby('club').size()
#grouped.order(ascending=True).plot()
club_counts = player_df['club'].value_counts()
#print club_counts
#club_counts.plot(kind='barh', rot=0)
pos_counts = player_df['position'].value_counts() # players acquired by position - raw count
print " Distribution of players purchased by position"
print pos_counts
print "Total players purchased " , len(player_df)
# plot positions in demand
print player_df.position.value_counts().plot(kind='bar', figsize=(5,5), title = 'positions in demand')
#player_df.groupby('position')['transfer_fee_euros'].sum().plot(title='total price by position')
position_compare = DataFrame()
#print player_df.season.unique()
#pieces = DataFrame()
for season in player_df.season.unique():
p1 = player_df[player_df.season==season].groupby(['position'])['transfer_fee_euros'].mean()
p1.name = season
if season == '10/11':
position_compare = DataFrame(p1)
else:
position_compare = position_compare.join(p1)
position_compare= position_compare.where(pd.notnull(position_compare),0)
print position_compare
print "\n\n\n"
print position_compare.plot(kind='barh', title='seasonal trend for mean price paid by position')
#player_df[player_df.season=='10/11'].groupby(['position'])['transfer_fee_euros'].mean().plot(title='avg price by position' , kind='barh', rot=0) # shows high price paid for Strikers CF
#pos_counts.plot(kind='barh', rot=0)
#player_df['transfer_fee_euros'].hist() # distribution of money spent
Distribution of players purchased by position CF 32 CB 18 CM 15 AM 15 RW 10 LW 8 DM 8 LB 6 GK 5 SS 2 RM 1 RB 1 LM 1 Total players purchased 122 Axes(0.125,0.125;0.775x0.775) 10/11 11/12 12/13 position AM 13862500.000000 16100000.000000 17033333.333333 CB 11385714.285714 10542857.142857 12675000.000000 CF 22400000.000000 12830000.000000 12905000.000000 CM 17750000.000000 9957142.857143 13375000.000000 DM 7225000.000000 9000000.000000 12665000.000000 GK 6000000.000000 14475000.000000 12600000.000000 LB 22700000.000000 7100000.000000 10000000.000000 LM 4800000.000000 0.000000 0.000000 LW 7000000.000000 20400000.000000 15460000.000000 RM 22000000.000000 0.000000 0.000000 RW 6533333.333333 12900000.000000 10100000.000000 Axes(0.125,0.125;0.775x0.775)
#t1[seasons].sort_index(ascending=False).plot(figsize(5,5),kind='barh', title='season '+ seasons+ ':fee by ' + col , rot=0)
# grouped = player_df[player_df.season=='10/11'].groupby(['to'])
position_compare = DataFrame()
#print player_df.season.unique()
#pieces = DataFrame()
stats_for_money = ['goals', 'minutes','assists' ]
def return_per_euro(stat):
for season in player_df.season.unique():
grouped = player_df[player_df.season==season].groupby(['to'])
#print grouped
#p1= grouped['transfer_fee_euros'].sum()/grouped['minutes'].sum()
p1= grouped['transfer_fee_euros'].sum()/grouped[stat].sum()
p1.name = season
if season == '10/11':
euros_per_stat = DataFrame(p1)
else:
euros_per_stat = euros_per_stat.join(p1)
euros_per_stat = euros_per_stat.fillna(value=0)
print euros_per_stat
print euros_per_stat.plot(kind='barh', title='euros spent by ' + stat)
return_per_euro('goals')
#for item in stats_for_money:
# return_per_euro(item)
10/11 11/12 12/13 to Arsenal 6.333333e+06 4.173333e+06 1.606061e+06 Aston Villa 2.200000e+06 5.400000e+06 5.866667e+05 Birmingham 2.685714e+06 0.000000e+00 0.000000e+00 Chelsea 8.233333e+06 4.132609e+06 3.885714e+06 Fulham 2.000000e+06 4.000000e+06 0.000000e+00 Liverpool 3.907955e+06 1.304000e+07 7.162500e+06 Man City 6.516071e+06 3.181034e+06 1.940000e+07 Man Utd 2.253846e+06 8.185714e+06 1.831944e+06 Newcastle 6.000000e+06 1.742308e+06 inf Spurs 1.428571e+06 inf 4.000000e+06 Stoke City 1.077778e+06 2.030000e+06 0.000000e+00 Sunderland 1.769231e+06 3.900000e+06 1.737500e+06 West Ham inf 0.000000e+00 4.750000e+06 Wigan inf 0.000000e+00 0.000000e+00 Wolves 7.200000e+05 inf 0.000000e+00 Axes(0.125,0.125;0.775x0.775)
return_per_euro('assists')
10/11 11/12 12/13 to Arsenal inf 6.955556e+06 2.304348e+06 Aston Villa 1.870000e+07 1.800000e+06 2.200000e+06 Birmingham 4.700000e+06 0.000000e+00 0.000000e+00 Chelsea 2.058333e+07 3.394643e+06 3.885714e+06 Fulham 2.000000e+06 2.000000e+06 0.000000e+00 Liverpool 5.057353e+06 4.075000e+06 9.550000e+06 Man City 7.932609e+06 3.843750e+06 inf Man Utd 9.766667e+06 4.092857e+06 2.536538e+06 Newcastle inf 1.887500e+06 8.250000e+06 Spurs 2.222222e+06 3.100000e+06 4.857143e+06 Stoke City 4.850000e+06 4.060000e+06 0.000000e+00 Sunderland 3.285714e+06 inf 2.316667e+06 West Ham inf 0.000000e+00 4.750000e+06 Wigan inf 0.000000e+00 0.000000e+00 Wolves 2.400000e+06 4.000000e+06 0.000000e+00 Axes(0.125,0.125;0.775x0.775)
return_per_euro('minutes')
10/11 11/12 12/13 to Arsenal 4282.172639 10059.456854 6938.989264 Aston Villa 8831.168831 5209.840810 3393.752410 Birmingham 8675.588371 0.000000 0.000000 Chelsea 20791.245791 9681.197800 11403.416833 Fulham 3121.748179 5280.528053 0.000000 Liverpool 13323.260499 6048.237477 14260.826282 Man City 13798.971411 11729.179911 15875.613748 Man Utd 11044.101018 15549.525102 8805.073431 Newcastle 35087.719298 3766.838517 6729.200653 Spurs 6056.935191 2513.173895 7383.279045 Stoke City 3489.208633 5671.975412 0.000000 Sunderland 7219.083490 5884.571860 5637.801663 West Ham 7805.724198 0.000000 3981.559095 Wigan 14949.494949 0.000000 0.000000 Wolves 5270.863836 3394.145100 0.000000 Axes(0.125,0.125;0.775x0.775)
#### An alternate view of comparing the clubs by season on various returns
def compare_stat(col):
''' takes column name from players_df dataframe and computes transfr_amount/col name '''
#col = 'minutes'
#Money spent each season by clubs on player acquisitions
grouped = player_df[player_df.season=='10/11'].groupby(['to'])
# money paid per minute on field time
t1 = (grouped['transfer_fee_euros'].sum()/ grouped[col].sum())
t1.name = '10/11'
t1 = DataFrame(t1)
grouped = player_df[player_df.season=='11/12'].groupby(['to'])
t2 = grouped['transfer_fee_euros'].sum()/ grouped[col].sum()
t2.name='11/12'
grouped = player_df[player_df.season=='12/13'].groupby(['to'])
t3 = grouped['transfer_fee_euros'].sum()/ grouped[col].sum()
t3.name='12/13'
t1= t1.join(t2)
t1= t1.join(t3)
t1= t1.where(pd.notnull(t1),0)
#print t1
fig = plt.figure()
fig.set_size_inches(20,20)
ax1 = fig.add_subplot(2, 2, 1)
ax2 = fig.add_subplot(2, 2, 2)
ax3 = fig.add_subplot(2, 2, 3)
i=1
for seasons in t1.columns:
#print seasons
x = fig.add_subplot(2,2,i)
t1[seasons].sort_index(ascending=False).plot(figsize(5,5),kind='barh', title='season '+ seasons+ ':fee by ' + col , rot=0)
i+=1
fig
t1= t1.transpose()
t1.plot(figsize(15,15))
# other parameters that can be passed - minutes , assists
compare_stat('goals')
stat = 'money_by_goals'
x = player_df.groupby('season').sort_index(by=stat)[['p_name','from', 'to', 'position','goals', 'transfer_fee_euros' , stat ]]
for season in player_df.season.unique():
print "\n\ntop 5 player acquisitions per goal scored in " + season
print x.xs(season)[:5]
top 5 player acquisitions for goal scored in 10/11 p_name from to position goals \ id 50935 Chicharito Guadalajara Man Utd CF 13 35099 Steven Fletcher Burnley Wolves CF 10 4192 Rafael van der Vaart Real Madrid Spurs AM 13 28245 Kenwyne Jones Sunderland Stoke City CF 9 13239 Darren Bent Sunderland Aston Villa CF 17 transfer_fee_euros money_by_goals id 50935 7500000 576923.076923 35099 7200000 720000.000000 4192 10000000 769230.769231 28245 9700000 1077777.777778 13239 21500000 1264705.882353 top 5 player acquisitions for goal scored in 11/12 p_name from to position goals transfer_fee_euros \ id 37304 Shane Long Reading West Brom CF 8 5100000 25742 Nikica Jelavic Rangers Everton CF 9 6600000 4205 Bobby Zamora Fulham QPR CF 7 5800000 3966 Djibril Cissé Lazio QPR CF 6 5000000 88755 Kevin de Bruyne KRC Genk Chelsea AM 8 8000000 money_by_goals id 37304 637500.000000 25742 733333.333333 4205 828571.428571 3966 833333.333333 88755 1000000.000000 top 5 player acquisitions for goal scored in 12/13 p_name from to position goals \ id 40205 Demba Ba Newcastle Chelsea CF 15 50201 Christian Benteke KRC Genk Aston Villa CF 15 82442 Olivier Giroud Montpellier Arsenal CF 11 27577 Clint Dempsey Fulham Spurs SS 6 4380 Robin van Persie Arsenal Man Utd CF 24 transfer_fee_euros money_by_goals id 40205 8500000 566666.666667 50201 8800000 586666.666667 82442 12000000 1090909.090909 27577 7500000 1250000.000000 4380 30700000 1279166.666667
stat = 'money_by_minutes'
x = player_df.groupby('season').sort_index(by=stat)[['p_name','from', 'minutes','to', 'position','goals', 'transfer_fee_euros' , stat ]]
for season in player_df.season.unique():
print "\n\ntop 5 player acquisitions per minute on field in " + season
print x.xs(season)[:5]
top 5 player acquisitions per minute on field in 10/11 p_name from minutes to position goals \ id 19368 Moussa Dembélé AZ Alkmaar 1922 Fulham CM 3 28245 Kenwyne Jones Sunderland 2780 Stoke City CF 9 5293 Sébastien Squillaci Sevilla FC 1820 Arsenal CB 1 4192 Rafael van der Vaart Real Madrid 2221 Spurs AM 13 76277 Laurent Koscielny Lorient 2617 Arsenal CB 2 transfer_fee_euros money_by_minutes id 19368 6000000 3121.748179 28245 9700000 3489.208633 5293 6500000 3571.428571 4192 10000000 4502.476362 76277 12500000 4776.461597 top 5 player acquisitions per minute on field in 11/12 p_name from minutes to position goals \ id 29434 Yohan Cabaye Lille 2821 Newcastle CM 4 37304 Shane Long Reading 2186 West Brom CF 8 4205 Bobby Zamora Fulham 2361 QPR CF 7 3785 Scott Parker West Ham 2467 Spurs CM 0 35571 José Enrique Newcastle 2994 Liverpool LB 0 transfer_fee_euros money_by_minutes id 29434 5000000 1772.421127 37304 5100000 2333.028362 4205 5800000 2456.586192 3785 6200000 2513.173895 35571 8000000 2672.010688 top 5 player acquisitions per minute on field in 12/13 p_name from minutes to position goals \ id 145988 Wilfried Zaha Crystal Palace 3539 Man Utd RW 6 50201 Christian Benteke KRC Genk 2593 Aston Villa CF 15 81796 Sung-Yong Ki Celtic 1956 Swansea DM 0 40205 Demba Ba Newcastle 2201 Chelsea CF 15 46220 Pablo Hernández Valencia CF 1771 Swansea RW 3 transfer_fee_euros money_by_minutes id 145988 11750000 3320.146934 50201 8800000 3393.752410 81796 6960000 3558.282209 40205 8500000 3861.880963 46220 7000000 3952.569170
stat = 'money_by_goals'
x = player_df.groupby(['season']).sort_index(by=stat,ascending=True)[['p_name','age', 'to', 'position','goals', 'transfer_fee_euros' , stat ]]
#print x[:5]
for season in player_df.season.unique():
print "\n\ntop 5 player acquisitions per goal scored in " + season
# make sure you consider players who scored at least 1 goal
print x[x.goals > 0 ].xs(season).sort('money_by_goals').sort('age', ascending=True)[:5]
top 5 player acquisitions per goal scored in 10/11 p_name age to position goals transfer_fee_euros \ id 45146 Mario Balotelli 20 Man City CF 6 29500000 74590 Sandro 21 Spurs DM 1 10000000 48066 Andy Carroll 22 Liverpool CF 13 41000000 50935 Chicharito 22 Man Utd CF 13 7500000 46741 David Luiz 23 Chelsea CB 2 30000000 money_by_goals id 45146 4916666.666667 74590 10000000.000000 48066 3153846.153846 50935 576923.076923 46741 15000000.000000 top 5 player acquisitions per goal scored in 11/12 p_name age to position goals transfer_fee_euros \ id 143424 Alex Oxlade-Chamberlain 17 Arsenal RW 2 13800000 95435 Connor Wickham 18 Sunderland CF 1 9000000 176485 Lucas Piazón 18 Chelsea AM 4 7500000 117996 Phil Jones 19 Man Utd CB 1 19300000 102427 Sebastián Coates 20 Liverpool CB 1 8000000 money_by_goals id 143424 6900000 95435 9000000 176485 1875000 117996 19300000 102427 8000000 top 5 player acquisitions per goal scored in 12/13 p_name age to position goals transfer_fee_euros \ id 134424 Nick Powell 18 Man Utd CM 1 7500000 80444 Coutinho 20 Liverpool AM 2 10000000 145988 Wilfried Zaha 20 Man Utd RW 6 11750000 85314 Oscar 20 Chelsea AM 2 32000000 50202 Eden Hazard 21 Chelsea LW 9 40000000 money_by_goals id 134424 7500000.000000 80444 5000000.000000 145988 1958333.333333 85314 16000000.000000 50202 4444444.444444
for season in player_df.season.unique():
print "\n\nSeason : " + season + "\n"
print x[x.goals > 0 ].xs(season).sort('money_by_goals').sort('age', ascending=False)[:5]
Season : 10/11 p_name age to position goals transfer_fee_euros \ id 7858 Yossi Benayoun 30 Chelsea LW 1 7000000 5293 Sébastien Squillaci 30 Arsenal CB 1 6500000 19729 Nikola Zigic 29 Birmingham CF 5 7000000 13091 Yaya Touré 27 Man City CM 6 30000000 13168 Raul Meireles 27 Liverpool CM 5 13000000 money_by_goals id 7858 7000000 5293 6500000 19729 1400000 13091 5000000 13168 2600000 Season : 11/12 p_name age to position goals transfer_fee_euros \ id 4205 Bobby Zamora 30 QPR CF 7 5800000 3966 Djibril Cissé 30 QPR CF 6 5000000 4072 Peter Crouch 30 Stoke City CF 10 11300000 7451 Mikel Arteta 29 Arsenal CM 7 12000000 13168 Raul Meireles 28 Chelsea CM 2 13500000 money_by_goals id 4205 828571.428571 3966 833333.333333 4072 1130000.000000 7451 1714285.714286 13168 6750000.000000 Season : 12/13 p_name age to position goals transfer_fee_euros \ id 27577 Clint Dempsey 29 Spurs SS 6 7500000 4380 Robin van Persie 29 Man Utd CF 24 30700000 8883 Emmanuel Adebayor 28 Spurs CF 3 6400000 40205 Demba Ba 27 Chelsea CF 15 8500000 15185 Lukas Podolski 27 Arsenal LW 9 12000000 money_by_goals id 27577 1250000.000000 4380 1279166.666667 8883 2133333.333333 40205 566666.666667 15185 1333333.333333
# some LW and RW positions are ambiguous
#print player_df[player_df.position == 'LW'][['p_name','position']]
##position codes categorized by position type
##defence = ['SW' , 'CB' , 'LB' , 'RB']
##midfield = ['DM', 'CM', 'RM','LM', 'AM', 'LW', 'RW']
##striker = ['LW', 'RW', 'SS', 'CF']
#del player_df['pos_type']
for season in player_df.season.unique():
# defensive positions
print "\n**************************************************************************************"
print '\nSeason: '+ season + ' - The top 5 defensive players by money spent per minute on field\n'
print player_df[player_df['position'].isin(['SW' , 'CB' ,'LB', 'RB'])].groupby(['season']).sort_index(by='money_by_minutes').xs(season)[:5][['p_name' ,'to', 'money_by_minutes']]
print '\nSeason: '+ season + ' - The top 5 midfield players by money spent per minute on field\n'
print player_df[player_df['position'].isin(['DM', 'CM', 'RM','LM', 'AM']) | ( player_df['id'].isin(['7858','14086','4063','15921','50202','15921','35251'])) ].groupby(['season']).sort_index(by='money_by_minutes').xs(season)[:5][['p_name' ,'to', 'money_by_minutes']]
print '\nSeason: '+ season + ' - The top 5 Attacking players by money spent by goal scored\n'
print player_df[player_df['position'].isin([ 'SS', 'CF']) | ( player_df['id'].isin(['31552','15185'])) ].groupby(['season']).sort_index(by='money_by_goals').xs(season)[:5][['p_name' ,'to', 'money_by_goals']]
print "-----------------------------------------------------------------------------------------\n\n\n"
#print player_df['pos_type'].value_counts()
#player_df.pos_type.value_counts()
************************************************************************************** Season: 10/11 - The top 5 defensive players by money spent per minute on field p_name to money_by_minutes id 5293 Sébastien Squillaci Arsenal 3571.428571 76277 Laurent Koscielny Arsenal 4776.461597 103427 Chris Smalling Man Utd 7366.482505 26485 Jérôme Boateng Man City 10080.645161 37526 Winston Reid West Ham 10319.410319 Season: 10/11 - The top 5 midfield players by money spent per minute on field p_name to money_by_minutes id 19368 Moussa Dembélé Fulham 3121.748179 4192 Rafael van der Vaart Spurs 4502.476362 13168 Raul Meireles Liverpool 5132.254244 18871 Jean Beausejour Birmingham 5721.096544 37157 Stéphane Sessègnon Sunderland 5917.159763 Season: 10/11 - The top 5 Attacking players by money spent by goal scored p_name to money_by_goals id 50935 Chicharito Man Utd 576923.076923 35099 Steven Fletcher Wolves 720000.000000 28245 Kenwyne Jones Stoke City 1077777.777778 13239 Darren Bent Aston Villa 1264705.882353 19729 Nikola Zigic Birmingham 1400000.000000 ----------------------------------------------------------------------------------------- ************************************************************************************** Season: 11/12 - The top 5 defensive players by money spent per minute on field p_name to money_by_minutes id 35571 José Enrique Liverpool 2672.010688 62688 Scott Dann Blackburn 2830.974188 7449 Gaël Clichy Man City 3114.951768 27511 Gary Cahill Chelsea 3369.434416 42751 Roger Johnson Wolves 3394.145100 Season: 11/12 - The top 5 midfield players by money spent per minute on field p_name to money_by_minutes id 29434 Yohan Cabaye Newcastle 1772.421127 3785 Scott Parker Spurs 2513.173895 37171 Craig Gardner Sunderland 3116.147309 28990 Charlie Adam Liverpool 3874.538745 88755 Kevin de Bruyne Chelsea 4424.778761 Season: 11/12 - The top 5 Attacking players by money spent by goal scored p_name to money_by_goals id 37304 Shane Long West Brom 637500.000000 25742 Nikica Jelavic Everton 733333.333333 4205 Bobby Zamora QPR 828571.428571 3966 Djibril Cissé QPR 833333.333333 4072 Peter Crouch Stoke City 1130000.000000 ----------------------------------------------------------------------------------------- ************************************************************************************** Season: 12/13 - The top 5 defensive players by money spent per minute on field p_name to money_by_minutes id 43250 Jan Vertonghen Spurs 4629.629630 57500 César Azpilicueta Chelsea 4808.743169 143559 Matija Nastasic Man City 8786.127168 57480 Mapou Yanga-Mbiwa Newcastle 10269.576380 43003 Nacho Monreal Arsenal 14265.335235 Season: 12/13 - The top 5 midfield players by money spent per minute on field p_name to money_by_minutes id 81796 Sung-Yong Ki Swansea 3558.282209 15921 Matt Jarvis West Ham 3981.559095 30670 Vurnon Anita Newcastle 5080.693365 44065 Esteban Granero QPR 5245.901639 15799 Santi Cazorla Arsenal 6458.191706 Season: 12/13 - The top 5 Attacking players by money spent by goal scored p_name to money_by_goals id 40205 Demba Ba Chelsea 566666.666667 50201 Christian Benteke Aston Villa 586666.666667 82442 Olivier Giroud Arsenal 1090909.090909 27577 Clint Dempsey Spurs 1250000.000000 4380 Robin van Persie Man Utd 1279166.666667 -----------------------------------------------------------------------------------------
# dictionary and list for stats
stats_list=['age', 'assists', 'goals', 'matches', 'minutes', 'minutes_per_goal', 'p_name', 'position', 'transfer_fee_euros', 'transfer_fee_pounds', 'money_by_minutes', 'money_by_goal_minutes', 'money_by_assists', 'money_by_goals', 'money_by_matches']
stats_dict={'age':'Age', 'assists':'Assists', 'goals':'Goals', 'matches':'Matches', 'minutes':'Minutes', 'minutes_per_goal':'Minutes per Goal', 'p_name':'Player Name', 'position':'Position',
'transfer_fee_euros':'Transfer Fee(Euros)', 'transfer_fee_pounds':'Transfer Fee(Pounds)', 'money_by_minutes':'Money by Minutes', 'money_by_goal_minutes':'Money by Goal Minutes',
'money_by_assists':'Money by Assists', 'money_by_goals':'Money by Goals', 'money_by_matches':'Money By Matches'}
import IPython.core.display
# start off form
dialog_form= '<div id="dialog" title="Select Data"><p>'
#define teams drop down menu
g = player_df
season_drop="<label>Season:<select id='season_drop'>"
for se in g.season.unique():
season_drop+='<option value="'+se+'">'+se+'</option>'
season_drop+="</select></label></br>"
dialog_form+=season_drop
team_drop="<label>Team:<select id='team_drop'>"
for team in g.to.unique():
team_drop+='<option value="'+team+'">'+team+'</option>'
team_drop+="</select></label></br>"
dialog_form+=team_drop
#start stat dropdown
stat_drop="<label>Stat:<select id='stat_drop'>"
for stat in g.columns:
if stat in stats_list:
stat_drop+='<option value="'+stat+'">'+stats_dict[stat]+'</option>'
stat_drop+="</select></label>"
dialog_form+=stat_drop
dialog_form+="</p></div>"
IPython.core.display.HTML(dialog_form)
team=None
stat=None
season=None
def prompt_results():
if not stat or not team or not season:
return "No Data Selected, Use Prompt"
# grouped = player_df[player_df.season=='11/12'].groupby(['to'])
g = player_df[player_df.season==season]
#g = player_df[player_df.season=='11/12']
gteam= g[g.club==team]
gstat= gteam[stat]
eq=""
if len(gstat.values)<=0:
return "No data for that selection, Try another."
for s in gstat:
eq+=str(s)+", "
eq= eq[:-2]
if type(gstat[0]) is numpy.float64:
eq+=" (Total: "+str(gstat.sum())+")"
print stats_dict[stat]+"(s) by "+team+" [with/of transfered players] = "+eq
#Working Non formatted
team=None
stat=None
season=None
IPython.core.display.Javascript("""
$( "#dialog" ).dialog({ buttons: [ { text: "Ok", click: function() {
var teami,stati,seasoni,py1,py2,py3;
teami= $('#team_drop').val();
stati= $('#stat_drop').val();
seasoni= $('#season_drop').val();
py1 = "team = '"+teami+"'";
py2 = "stat = '"+stati+"'";
py3 = "season = '"+seasoni+"'";
IPython.notebook.kernel.execute(py1);
IPython.notebook.kernel.execute(py2);
IPython.notebook.kernel.execute(py3);
$( this ).dialog( "close" ); } } ] });
""")
#html formated code
team=None
stat=None
season=None
IPython.core.display.Javascript("""
$( '#dialog' ).dialog({ buttons: [ { text: 'Ok', click: function() {
var teami,stati,seasoni,py1,py2,pyr3;
teami= $('#team_drop').val();
stati= $('#stat_drop').val();
seasoni= $('#season_drop').val()
py1 = 'team = "'+teami+'"';
py2 = 'stat = "'+stati+'"';
pyr3 = 'season = "'+seasoni+'"';
IPython.notebook.kernel.execute(py1);
IPython.notebook.kernel.execute(py2);
IPython.notebook.kernel.execute(pyr3);
$( this ).dialog( 'close' ); } } ] });
""")
prompt_results()
Goals(s) by Man Utd [with/of transfered players] = 6.0, 1.0, 0.0 (Total: 7.0)