In [43]:
import pandas as pd

cric = pd.read_html('http://stats.espncricinfo.com/ci/engine/player/348144.html?class=3;template=results;type=batting;view=innings', match='innings', na_values='-')
df = cric[0]
In [44]:
df.head(10)
Out[44]:
Runs Mins BF 4s 6s SR Pos Dismissal Inns Unnamed: 9 Opposition Ground Start Date Unnamed: 13
0 15* 13.0 11 2 0 136.36 3 not out 2 NaN v England Manchester 7 Sep 2016 T20I # 566
1 55* 49.0 37 6 2 148.64 3 not out 2 NaN v West Indies Dubai (DSC) 23 Sep 2016 T20I # 568
2 19 28.0 18 2 0 105.55 3 caught 1 NaN v West Indies Dubai (DSC) 24 Sep 2016 T20I # 569
3 27* 42.0 24 1 0 112.50 3 not out 2 NaN v West Indies Abu Dhabi 27 Sep 2016 T20I # 570
4 29 NaN 30 3 0 96.66 3 caught 2 NaN v West Indies Bridgetown 26 Mar 2017 T20I # 602
5 27 NaN 28 4 0 96.42 3 caught 1 NaN v West Indies Port of Spain 30 Mar 2017 T20I # 603
6 43 NaN 38 3 1 113.15 4 bowled 1 NaN v West Indies Port of Spain 1 Apr 2017 T20I # 604
7 38 NaN 36 1 1 105.55 3 caught 2 NaN v West Indies Port of Spain 2 Apr 2017 T20I # 605
8 86 NaN 52 10 2 165.38 3 caught 1 NaN v World-XI Lahore 12 Sep 2017 T20I # 619
9 45 NaN 38 5 0 118.42 3 caught 1 NaN v World-XI Lahore 13 Sep 2017 T20I # 620
In [45]:
# Clean up example,eg remove *
df['Start Date'] = pd.to_datetime(df['Start Date'])
df['Runs'] = df['Runs'].str.extract('(\d+)').astype(int)
In [46]:
df.dtypes
Out[46]:
Runs                    int32
Mins                  float64
BF                      int64
4s                      int64
6s                      int64
SR                    float64
Pos                     int64
Dismissal              object
Inns                    int64
Unnamed: 9            float64
Opposition             object
Ground                 object
Start Date     datetime64[ns]
Unnamed: 13            object
dtype: object
In [47]:
# Examle take out columns
col_choice = df.loc[:, 'Runs':'6s'].head(10)
#col_choice.to_csv('out.csv')
In [48]:
col_choice.head(10)
Out[48]:
Runs Mins BF 4s 6s
0 15 13.0 11 2 0
1 55 49.0 37 6 2
2 19 28.0 18 2 0
3 27 42.0 24 1 0
4 29 NaN 30 3 0
5 27 NaN 28 4 0
6 43 NaN 38 3 1
7 38 NaN 36 1 1
8 86 NaN 52 10 2
9 45 NaN 38 5 0
In [49]:
# If want to fill NaN with 0
df.fillna(0).head(10)
Out[49]:
Runs Mins BF 4s 6s SR Pos Dismissal Inns Unnamed: 9 Opposition Ground Start Date Unnamed: 13
0 15 13.0 11 2 0 136.36 3 not out 2 0.0 v England Manchester 2016-09-07 T20I # 566
1 55 49.0 37 6 2 148.64 3 not out 2 0.0 v West Indies Dubai (DSC) 2016-09-23 T20I # 568
2 19 28.0 18 2 0 105.55 3 caught 1 0.0 v West Indies Dubai (DSC) 2016-09-24 T20I # 569
3 27 42.0 24 1 0 112.50 3 not out 2 0.0 v West Indies Abu Dhabi 2016-09-27 T20I # 570
4 29 0.0 30 3 0 96.66 3 caught 2 0.0 v West Indies Bridgetown 2017-03-26 T20I # 602
5 27 0.0 28 4 0 96.42 3 caught 1 0.0 v West Indies Port of Spain 2017-03-30 T20I # 603
6 43 0.0 38 3 1 113.15 4 bowled 1 0.0 v West Indies Port of Spain 2017-04-01 T20I # 604
7 38 0.0 36 1 1 105.55 3 caught 2 0.0 v West Indies Port of Spain 2017-04-02 T20I # 605
8 86 0.0 52 10 2 165.38 3 caught 1 0.0 v World-XI Lahore 2017-09-12 T20I # 619
9 45 0.0 38 5 0 118.42 3 caught 1 0.0 v World-XI Lahore 2017-09-13 T20I # 620
In [50]:
df.plot(kind='bar', x='Runs', y='BF')
Out[50]:
<matplotlib.axes._subplots.AxesSubplot at 0x1fec1cbbf98>
In [ ]:
 
In [ ]: