import pandas as pd
import numpy as np
Read data from IOI homepage:
url=r'http://stats.ioinformatics.org/countries/'
dfs = pd.read_html(url)#returns a list of tables
df=dfs[0]
df.head()#inspect header
0 | 1 | 2 | 3 | 4 | 5 | 6 | |
---|---|---|---|---|---|---|---|
0 | NaN | Country▲ | IOI Host | Medals | NaN | NaN | NaN |
1 | G | S | B | Total | NaN | NaN | NaN |
2 | NaN | Albania | NaN | 0 | 0 | 0 | 0 |
3 | ? | Argentina | 1993 | 2 | 8 | 22 | 32 |
4 | ? | Armenia | NaN | 1 | 4 | 20 | 25 |
Re-read skipping rows 0,1:
url=r'http://stats.ioinformatics.org/countries/'
dfs = pd.read_html(url, skiprows=[0,1])
df=dfs[0]
df.head()
0 | 1 | 2 | 3 | 4 | 5 | 6 | |
---|---|---|---|---|---|---|---|
0 | NaN | Albania | NaN | 0 | 0 | 0 | 0 |
1 | ? | Argentina | 1993 | 2 | 8 | 22 | 32 |
2 | ? | Armenia | NaN | 1 | 4 | 20 | 25 |
3 | NaN | Australia | 2013 | 6 | 14 | 30 | 50 |
4 | NaN | Austria | NaN | 3 | 5 | 22 | 30 |
print df.columns
Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')
Delete columns 0, 2:
df.drop([0,2], axis=1, inplace=True)
print df.head()
1 3 4 5 6 0 Albania 0 0 0 0 1 Argentina 2 8 22 32 2 Armenia 1 4 20 25 3 Australia 6 14 30 50 4 Austria 3 5 22 30
dfn=df.rename(columns = {1: 'Country', 3:'Gold', 4: 'Silver', 5: 'Bronze', 6:'Total'})
dfn.head()
Country | Gold | Silver | Bronze | Total | |
---|---|---|---|---|---|
0 | Albania | 0 | 0 | 0 | 0 |
1 | Argentina | 2 | 8 | 22 | 32 |
2 | Armenia | 1 | 4 | 20 | 25 |
3 | Australia | 6 | 14 | 30 | 50 |
4 | Austria | 3 | 5 | 22 | 30 |
dfn = dfn[(dfn[['Total']] != 0).all(axis=1)]#delete countries with no medal
L= len(dfn)
dfn= dfn.set_index(np.arange(L))
dfn.head()
Country | Gold | Silver | Bronze | Total | |
---|---|---|---|---|---|
0 | Argentina | 2 | 8 | 22 | 32 |
1 | Armenia | 1 | 4 | 20 | 25 |
2 | Australia | 6 | 14 | 30 | 50 |
3 | Austria | 3 | 5 | 22 | 30 |
4 | Azerbaijan | 0 | 1 | 4 | 5 |
List the complete table:
from IPython.display import HTML
h = HTML(dfn.to_html());h
Country | Gold | Silver | Bronze | Total | |
---|---|---|---|---|---|
0 | Argentina | 2 | 8 | 22 | 32 |
1 | Armenia | 1 | 4 | 20 | 25 |
2 | Australia | 6 | 14 | 30 | 50 |
3 | Austria | 3 | 5 | 22 | 30 |
4 | Azerbaijan | 0 | 1 | 4 | 5 |
5 | Bangladesh | 0 | 1 | 3 | 4 |
6 | Belarus | 14 | 32 | 33 | 79 |
7 | Belgium | 0 | 2 | 6 | 8 |
8 | Bosnia and Herzegovina | 0 | 1 | 5 | 6 |
9 | Brazil | 2 | 9 | 27 | 38 |
10 | Bulgaria | 23 | 39 | 31 | 93 |
11 | Canada | 10 | 22 | 33 | 65 |
12 | Chile | 0 | 0 | 1 | 1 |
13 | China | 72 | 23 | 12 | 107 |
14 | Colombia | 0 | 2 | 9 | 11 |
15 | Croatia | 11 | 33 | 32 | 76 |
16 | Cuba | 0 | 6 | 26 | 32 |
17 | Cyprus | 0 | 0 | 2 | 2 |
18 | Czech Republic | 13 | 22 | 36 | 71 |
19 | Czechoslovakia | 3 | 4 | 2 | 9 |
20 | Denmark | 3 | 6 | 19 | 28 |
21 | Egypt | 0 | 4 | 12 | 16 |
22 | Estonia | 5 | 17 | 30 | 52 |
23 | Finland | 5 | 21 | 33 | 59 |
24 | France | 2 | 7 | 27 | 36 |
25 | Georgia | 2 | 7 | 31 | 40 |
26 | German Democratic Republic | 0 | 3 | 0 | 3 |
27 | Germany | 14 | 26 | 36 | 76 |
28 | Greece | 0 | 4 | 21 | 25 |
29 | Hong Kong | 4 | 21 | 38 | 63 |
30 | Hungary | 11 | 27 | 41 | 79 |
31 | India | 1 | 11 | 27 | 39 |
32 | Indonesia | 2 | 18 | 29 | 49 |
33 | Iran | 19 | 47 | 22 | 88 |
34 | Ireland | 1 | 4 | 12 | 17 |
35 | Israel | 5 | 22 | 25 | 52 |
36 | Italy | 2 | 14 | 24 | 40 |
37 | Japan | 16 | 17 | 8 | 41 |
38 | Kazakhstan | 2 | 14 | 24 | 40 |
39 | Kyrgyzstan | 0 | 0 | 3 | 3 |
40 | Latvia | 6 | 23 | 39 | 68 |
41 | Lithuania | 2 | 25 | 39 | 66 |
42 | Luxembourg | 0 | 1 | 14 | 15 |
43 | Macau | 0 | 4 | 10 | 14 |
44 | Macedonia | 0 | 1 | 13 | 14 |
45 | Malaysia | 0 | 2 | 3 | 5 |
46 | Malta | 0 | 0 | 2 | 2 |
47 | Mexico | 1 | 3 | 13 | 17 |
48 | Moldova | 0 | 2 | 22 | 24 |
49 | Mongolia | 0 | 0 | 2 | 2 |
50 | Netherlands | 3 | 15 | 36 | 54 |
51 | New Zealand | 0 | 1 | 14 | 15 |
52 | Norway | 0 | 2 | 5 | 7 |
53 | Philippines | 0 | 0 | 1 | 1 |
54 | Poland | 34 | 36 | 27 | 97 |
55 | Portugal | 0 | 0 | 5 | 5 |
56 | Republic of Korea | 33 | 33 | 26 | 92 |
57 | Romania | 28 | 42 | 25 | 95 |
58 | Russia | 52 | 32 | 12 | 96 |
59 | Serbia | 0 | 11 | 14 | 25 |
60 | Serbia and Montenegro | 2 | 15 | 17 | 34 |
61 | Singapore | 8 | 27 | 34 | 69 |
62 | Slovakia | 24 | 37 | 23 | 84 |
63 | Slovenia | 0 | 4 | 22 | 26 |
64 | South Africa | 4 | 13 | 34 | 51 |
65 | Soviet Union | 3 | 4 | 1 | 8 |
66 | Spain | 0 | 4 | 12 | 16 |
67 | Sri Lanka | 3 | 7 | 16 | 26 |
68 | Sweden | 13 | 22 | 27 | 62 |
69 | Switzerland | 0 | 7 | 25 | 32 |
70 | Syria | 0 | 2 | 3 | 5 |
71 | Taiwan | 17 | 42 | 22 | 81 |
72 | Tajikistan | 0 | 0 | 4 | 4 |
73 | Thailand | 15 | 32 | 41 | 88 |
74 | Trinidad and Tobago | 0 | 0 | 3 | 3 |
75 | Tunisia | 0 | 0 | 1 | 1 |
76 | Turkey | 2 | 20 | 37 | 59 |
77 | Turkmenistan | 1 | 0 | 4 | 5 |
78 | Ukraine | 7 | 22 | 44 | 73 |
79 | United Kingdom | 5 | 16 | 29 | 50 |
80 | United States of America | 42 | 33 | 15 | 90 |
81 | Venezuela | 0 | 0 | 1 | 1 |
82 | Vietnam | 10 | 36 | 45 | 91 |
83 | Yugoslavia | 1 | 3 | 1 | 5 |
Shorten a few country names:
dfn.loc[8, 'Country']='BosniaHer'
dfn.loc[18,'Country']='CzechRep'
dfn.loc[19, 'Country']='Czechoslov'
dfn.loc[26, 'Country']='GermanDemR'
dfn.loc[29,'Country']='HongKong'
dfn.loc[51,'Country']='NewZealand'
dfn.loc[56, 'Country']='RepKorea'
dfn.loc[60, 'Country']='SerbiaMoN'
dfn.loc[64, 'Country']='SouthAfrica'
dfn.loc[65, 'Country']='SovUnion'
dfn.loc[67, 'Country']='SriLanka'
dfn.loc[74, 'Country']='TrinidadTo'
dfn.loc[79, 'Country']='UnKingdom'
dfn.loc[80, 'Country']='USA'
#Check data again
#h = HTML(dfn.to_html());h
Convert data from the last 4 columns to int:
for c in ['Gold', 'Silver', 'Bronze', 'Total']:
dfn[c]=dfn[c].astype(int)
Sort data increasingly with respect to the total number of medals:
dfs=dfn.sort(columns='Total')
L=len(dfs)
dfs= dfs.set_index(np.arange(L))
dfs.head()
Country | Gold | Silver | Bronze | Total | |
---|---|---|---|---|---|
0 | Philippines | 0 | 0 | 1 | 1 |
1 | Venezuela | 0 | 0 | 1 | 1 |
2 | Tunisia | 0 | 0 | 1 | 1 |
3 | Chile | 0 | 0 | 1 | 1 |
4 | Mongolia | 0 | 0 | 2 | 2 |
Set colors for Gold, Silver and Bronze medal:
colors=['#D9D919', '#C0C0C0', '#A67D3D'] #bright gold, silver, bronze
import plotly.plotly as py
from plotly.graph_objs import *
py.sign_in("empet", "shzbvu3hbh")
Co=dfs['Country'].values.tolist()
Go=dfs['Gold'].values.tolist()
Si=dfs['Silver'].values.tolist()
Bro=dfs['Bronze'].values.tolist()
To=dfs['Total'].values.tolist()
print max(To)
107
anno_text="Source: <a href='http://stats.ioinformatics.org/countries/'>[1]</a>"
trace1= Bar(
x=Go,
y=Co,
orientation='h',
marker=Marker(color=colors[0]),
name='gold'
)
trace2= Bar(
x=Si,
y=Co,
orientation='h',
marker=Marker(color=colors[1]),
name='silver'
)
trace3= Bar(
x=Bro,
y=Co,
orientation='h',
marker=Marker(color=colors[2]),
name='bronze'
)
data = Data([trace1, trace2, trace3])
m_title='Unofficial country rankings according to the total number of medals at IOI <br> 1989-2015'
layout = Layout(
title=m_title,
font=Font(family='Raleway, sans-serif'
),
showlegend=False,
autosize=False,
width=800,
height=1000,
xaxis=XAxis(
range=[0, 108],
gridwidth=2,
showgrid=True,
zeroline=True,
showline=True,
mirror=True,
side='top'
),
yaxis=YAxis(
#title='Country',
showgrid=False,
zeroline=True,
showline=True,
autotick=True,
nticks=0,
ticks='outside',
showticklabels=True,
tick0=0,
dtick=1,
ticklen=10,
mirror=True
),
margin=Margin(
l=80,
r=80,
b=80,
t=100,
pad=2
),
annotations=Annotations([
Annotation(
showarrow=False,
text=anno_text,
xref='paper',
yref='paper',
x=0,
y=-0.075,
xanchor='left',
yanchor='bottom',
font=Font(
size=14
)
)
]),
bargap=0.18,
bargroupgap=0,
hovermode='y',
dragmode='zoom',
barmode='stack',
)
fig = Figure(data=data, layout=layout)
py.iplot(fig, filename='IOI-Rankings', height=1005)
from IPython.core.display import HTML
def css_styling():
styles = open("./custom.css", "r").read()
return HTML(styles)
css_styling()