import pandas as pd
cric = pd.read_html('http://stats.espncricinfo.com/ci/engine/player/348144.html?class=3;template=results;type=batting;view=innings', match='innings', na_values='-')
df = cric[0]
df.head(10)
Runs | Mins | BF | 4s | 6s | SR | Pos | Dismissal | Inns | Unnamed: 9 | Opposition | Ground | Start Date | Unnamed: 13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 15* | 13.0 | 11 | 2 | 0 | 136.36 | 3 | not out | 2 | NaN | v England | Manchester | 7 Sep 2016 | T20I # 566 |
1 | 55* | 49.0 | 37 | 6 | 2 | 148.64 | 3 | not out | 2 | NaN | v West Indies | Dubai (DSC) | 23 Sep 2016 | T20I # 568 |
2 | 19 | 28.0 | 18 | 2 | 0 | 105.55 | 3 | caught | 1 | NaN | v West Indies | Dubai (DSC) | 24 Sep 2016 | T20I # 569 |
3 | 27* | 42.0 | 24 | 1 | 0 | 112.50 | 3 | not out | 2 | NaN | v West Indies | Abu Dhabi | 27 Sep 2016 | T20I # 570 |
4 | 29 | NaN | 30 | 3 | 0 | 96.66 | 3 | caught | 2 | NaN | v West Indies | Bridgetown | 26 Mar 2017 | T20I # 602 |
5 | 27 | NaN | 28 | 4 | 0 | 96.42 | 3 | caught | 1 | NaN | v West Indies | Port of Spain | 30 Mar 2017 | T20I # 603 |
6 | 43 | NaN | 38 | 3 | 1 | 113.15 | 4 | bowled | 1 | NaN | v West Indies | Port of Spain | 1 Apr 2017 | T20I # 604 |
7 | 38 | NaN | 36 | 1 | 1 | 105.55 | 3 | caught | 2 | NaN | v West Indies | Port of Spain | 2 Apr 2017 | T20I # 605 |
8 | 86 | NaN | 52 | 10 | 2 | 165.38 | 3 | caught | 1 | NaN | v World-XI | Lahore | 12 Sep 2017 | T20I # 619 |
9 | 45 | NaN | 38 | 5 | 0 | 118.42 | 3 | caught | 1 | NaN | v World-XI | Lahore | 13 Sep 2017 | T20I # 620 |
# Clean up example,eg remove *
df['Start Date'] = pd.to_datetime(df['Start Date'])
df['Runs'] = df['Runs'].str.extract('(\d+)').astype(int)
df.dtypes
Runs int32 Mins float64 BF int64 4s int64 6s int64 SR float64 Pos int64 Dismissal object Inns int64 Unnamed: 9 float64 Opposition object Ground object Start Date datetime64[ns] Unnamed: 13 object dtype: object
# Examle take out columns
col_choice = df.loc[:, 'Runs':'6s'].head(10)
#col_choice.to_csv('out.csv')
col_choice.head(10)
Runs | Mins | BF | 4s | 6s | |
---|---|---|---|---|---|
0 | 15 | 13.0 | 11 | 2 | 0 |
1 | 55 | 49.0 | 37 | 6 | 2 |
2 | 19 | 28.0 | 18 | 2 | 0 |
3 | 27 | 42.0 | 24 | 1 | 0 |
4 | 29 | NaN | 30 | 3 | 0 |
5 | 27 | NaN | 28 | 4 | 0 |
6 | 43 | NaN | 38 | 3 | 1 |
7 | 38 | NaN | 36 | 1 | 1 |
8 | 86 | NaN | 52 | 10 | 2 |
9 | 45 | NaN | 38 | 5 | 0 |
# If want to fill NaN with 0
df.fillna(0).head(10)
Runs | Mins | BF | 4s | 6s | SR | Pos | Dismissal | Inns | Unnamed: 9 | Opposition | Ground | Start Date | Unnamed: 13 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 15 | 13.0 | 11 | 2 | 0 | 136.36 | 3 | not out | 2 | 0.0 | v England | Manchester | 2016-09-07 | T20I # 566 |
1 | 55 | 49.0 | 37 | 6 | 2 | 148.64 | 3 | not out | 2 | 0.0 | v West Indies | Dubai (DSC) | 2016-09-23 | T20I # 568 |
2 | 19 | 28.0 | 18 | 2 | 0 | 105.55 | 3 | caught | 1 | 0.0 | v West Indies | Dubai (DSC) | 2016-09-24 | T20I # 569 |
3 | 27 | 42.0 | 24 | 1 | 0 | 112.50 | 3 | not out | 2 | 0.0 | v West Indies | Abu Dhabi | 2016-09-27 | T20I # 570 |
4 | 29 | 0.0 | 30 | 3 | 0 | 96.66 | 3 | caught | 2 | 0.0 | v West Indies | Bridgetown | 2017-03-26 | T20I # 602 |
5 | 27 | 0.0 | 28 | 4 | 0 | 96.42 | 3 | caught | 1 | 0.0 | v West Indies | Port of Spain | 2017-03-30 | T20I # 603 |
6 | 43 | 0.0 | 38 | 3 | 1 | 113.15 | 4 | bowled | 1 | 0.0 | v West Indies | Port of Spain | 2017-04-01 | T20I # 604 |
7 | 38 | 0.0 | 36 | 1 | 1 | 105.55 | 3 | caught | 2 | 0.0 | v West Indies | Port of Spain | 2017-04-02 | T20I # 605 |
8 | 86 | 0.0 | 52 | 10 | 2 | 165.38 | 3 | caught | 1 | 0.0 | v World-XI | Lahore | 2017-09-12 | T20I # 619 |
9 | 45 | 0.0 | 38 | 5 | 0 | 118.42 | 3 | caught | 1 | 0.0 | v World-XI | Lahore | 2017-09-13 | T20I # 620 |
df.plot(kind='bar', x='Runs', y='BF')
<matplotlib.axes._subplots.AxesSubplot at 0x1fec1cbbf98>