Love and Live the Game!
To reproduce the plots you need an
api_key
to sign in to Plotly.
%%bash
whoami
date
Aziz Wed Dec 2 19:10:12 EST 2015
P: Games Played
W: Games Won
D: Games Drawn
L: Games Lost
GS: Goals Scored
GA: Goals Against
Diff: Goals Difference
Pts: Points
for l, df in leagues.items():
print(l)
print(df.head())
print
Premier League P W D L GS GA Diff Pts Team 1-MCI 14 9 2 3 30 14 16 29 2-LEI 14 8 5 1 29 21 8 29 3-MUN 14 8 4 2 20 10 10 28 4-ARS 14 8 3 3 24 12 12 27 5-TOT 14 6 7 1 24 11 13 25 Bundesliga P W D L GS GA Diff Pts Team 1-BAY 14 13 1 0 42 5 37 40 2-BVB 14 10 2 2 40 19 21 32 3-WOB 14 7 4 3 23 15 8 25 4-BMG 14 7 2 5 28 22 6 23 5-HER 14 7 2 5 18 17 1 23 Ligue 1 P W D L GS GA Diff Pts Team 1-PSG 16 13 3 0 37 8 29 42 2-CAE 16 9 2 5 19 16 3 29 3-ANG 16 7 6 3 14 9 5 27 4-LYO 16 7 5 4 21 14 7 26 5-NIC 16 7 4 5 30 19 11 25 Serie A P W D L GS GA Diff Pts Team 1-NAP 14 9 4 1 26 9 17 31 2-INT 14 9 3 2 17 9 8 30 3-FIO 14 9 2 3 27 12 15 29 4-ROM 14 8 3 3 29 17 12 27 5-JUV 14 7 3 4 20 11 9 24 La Liga P W D L GS GA Diff Pts Team 1-FCB 13 11 0 2 33 12 21 33 2-ATM 13 9 2 2 18 6 12 29 3-RMA 13 8 3 2 28 11 17 27 4-CEL 13 7 3 3 24 21 3 24 5-DEP 13 5 6 2 20 13 7 21
bubble_2d
plot¶py.iplot_mpl(figs_2d[0])
py.iplot_mpl(figs_2d[1])
py.iplot_mpl(figs_2d[2])
py.iplot_mpl(figs_2d[3])
py.iplot_mpl(figs_2d[4])
bubble_3d
plot¶py.iplot(figs_3d[0])
py.iplot(figs_3d[1])
py.iplot(figs_3d[2])
py.iplot(figs_3d[3])
py.iplot(figs_3d[4])
boxplot
plot¶py.iplot(figs_box[0])
py.iplot(figs_box[1])
py.iplot(figs_box[2])
py.iplot(figs_box[3])
py.iplot(figs_box[4])
density
plot¶py.iplot_mpl(figs_kde[0])
py.iplot_mpl(figs_kde[1])
py.iplot_mpl(figs_kde[2])
py.iplot_mpl(figs_kde[3])
py.iplot_mpl(figs_kde[4])
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
urls = {
'La Liga' : 'http://www.goal.com/en/tables/primera-divisi%C3%B3n/7',
'Bundesliga' :'http://www.goal.com/en/tables/bundesliga/9?ICID=SP_TN_112',
'Premier League':'http://www.goal.com/en/tables/premier-league/8?ICID=TA',
'Serie A' :'http://www.goal.com/en/tables/serie-a/13?ICID=SP_TN_114',
'Ligue 1' :'http://www.goal.com/en/tables/ligue-1/16?ICID=SP_TN_114',
}
def scrape_table(url):
'''input: league url, return: a list of teams' standings list '''
data = requests.get(url).text
so = bs(data)
table = so.find('table', class_='short')
standings = table.findChild('tbody')
teams_html = standings.findAll('tr')
teams = []
for i, team in enumerate(teams_html):
t = []
for d in team.findChildren('td'):
data = str(d.text.strip().encode('ascii', 'ignore'))
# aggregate a team standings
t.append(data)
# remove empty string from the standings list
t = [x for x in t if x]
# add team standings into a list
teams.append(t)
return teams
def to_df(teams):
"""create dataframe from the teams' standings lists"""
cols = ['pos','full_name', 'Team', 'PtsF', 'P', 'W', 'D', 'L', 'WH','DH', 'LH', 'WA','DA','LA', 'GS', 'GA', 'Diff', 'Pts']
df = pd.DataFrame(columns=cols)
for i, team in enumerate(teams):
df.loc[i] = team
return df
def remove_cols(df):
# remove un-needed cols
useless = ['pos', 'full_name', 'PtsF', 'WH', 'WA', 'DH', 'DA', 'LH', 'LA'] #, 'diff']
for u in useless:
del df['{}'.format(u)]
def apply_int(df):
# convert cols type from str to int (for plotting)
for c in df.columns:
df[c] = df[c].apply(int)
return df
def league_df(url):
"""return {league : dataframe_table}"""
teams = scrape_table(url)
df = to_df(teams)
# concate 'position' and 'team'
df['Team'] = ['{}-{}'.format(p, t) for p, t in zip(df['pos'], df['Team'])]
# remove un-usefull columns
remove_cols(df)
# set team name as the df index
df = df.set_index('Team')
# set columns to int values
df = apply_int(df)
return df
{league : its_table_data_frame}
¶leagues = {}
for league, url in urls.items():
df = league_df(url)
leagues[league] = df
print(leagues.keys())
['Premier League', 'Bundesliga', 'Ligue 1', 'Serie A', 'La Liga']
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
py.sign_in('username', 'api_key')
# ref: https://plot.ly/python/matplotlib-to-plotly-tutorial/#Bubble-Charts
def bubble_2d(df, league='Soccer League'):
mpl_fig = plt.figure() # (!) set new mpl figure object
ax = mpl_fig.add_subplot(111) # add axis
plt.xlabel('Points')
plt.ylabel('Goals Scored')
plt.title(league)
scatter = ax.scatter(
df['Pts'],
df['GS'],
c=df['GS'], # using some color scale
s=np.sqrt(df['Pts']**5),
linewidths=2,
edgecolor='w',
alpha=0.6
)
for i_X, X in df.iterrows():
plt.text(
X['Pts'],
X['GS'],
i_X, # team name
size=8,
horizontalalignment='center'
)
return mpl_fig
# # Test
# fig = bubble_2d(df, league)
# py.iplot_mpl(fig)
figs_2d = []
for l, d in leagues.items():
fig = bubble_2d(d, l)
figs_2d.append(fig)
# https://plot.ly/~jorgesantos/402/cufflinks-bubble-3d-chart/
def bubble_3d(df, league='Soccer League'):
traces = []
for row in df.iterrows():
team, score = row
trace = go.Scatter3d(
x= score.GA,
y= score.GS,
z= score.Pts,
marker= go.Marker(
line=go.Line(
width=0.5
),
size= score.Pts * 1.5, # [bubble size],
symbol='dot'
),
opacity=0.7,
mode='markers',
name=team,
text= team, # [team names]
)
# add team's Scatter3d trace to list of Data
traces.append(trace)
data = go.Data(traces)
layout = go.Layout(
scene=go.Scene(
xaxis=go.XAxis(
title='Goals Against (x)',
),
yaxis=go.YAxis(
title='Goals Scored (y)',
),
zaxis=go.ZAxis(
title='Points (z)'
),
),
title=league
)
fig = go.Figure(data=data, layout=layout)
return fig
figs_3d = []
for l, d in leagues.items():
fig = bubble_3d(d, l)
figs_3d.append(fig)
# ref: https://plot.ly/python/box-plots/
def boxplot(df, league='Soccer League'):
traces = []
for c in [a for a in df.columns if a is not 'P']:
trace = go.Box(
y = df[c].values,
name = c,
)
traces.append(trace)
data = go.Data(traces)
layout = go.Layout(
title=league
)
fig = go.Figure(data=data, layout=layout)
return fig
# # TEST
# fig = boxplot(df, 'La Liga')
# py.iplot(fig)
figs_box = []
for l, d in leagues.items():
fig = boxplot(d, l)
figs_box.append(fig)
def density(df, league):
fig, ax = plt.subplots()
cols = [c for c in df.columns if c is not 'P']
df = df[cols]
df.plot(kind='kde', ax=ax, title=league)
return fig
# # Test
# fig = density(df, league)
# py.iplot_mpl(fig)
figs_kde = []
for l, d in leagues.items():
fig = density(d, l)
figs_kde.append(fig)