import pandas as pd
df = pd.read_html('https://en.wikipedia.org/wiki/Premier_League_records_and_statistics', match='Seasons')
df = df[0]
df.head()
Pos. | Club | Seasons | Pld | Win | Draw | Loss | GF | GA | GD | Pts | 1st | 2nd | 3rd | 4th | Relegated | BestPos. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Manchester United | 27 | 1038 | 648 | 224 | 166 | 1989 | 929 | 1060 | 2168 | 13.0 | 6.0 | 3.0 | 1.0 | NaN | 1 |
1 | 2 | Arsenal | 27 | 1038 | 565 | 260 | 213 | 1845 | 1013 | 832 | 1955 | 3.0 | 6.0 | 5.0 | 7.0 | NaN | 1 |
2 | 3 | Chelsea | 27 | 1038 | 558 | 257 | 223 | 1770 | 1002 | 768 | 1931 | 5.0 | 4.0 | 5.0 | 2.0 | NaN | 1 |
3 | 4 | Liverpool | 27 | 1038 | 529 | 262 | 247 | 1774 | 1046 | 728 | 1849 | NaN | 4.0 | 5.0 | 7.0 | NaN | 2 |
4 | 5 | Tottenham Hotspur | 27 | 1038 | 446 | 257 | 335 | 1547 | 1306 | 241 | 1595 | NaN | 1.0 | 2.0 | 3.0 | NaN | 2 |
# Look at types,most is good but GD,Pst may ned to clean if shall use that data
df.dtypes
Pos. int64 Club object Seasons int64 Pld int64 Win int64 Draw int64 Loss int64 GF int64 GA int64 GD object Pts object 1st float64 2nd float64 3rd float64 4th float64 Relegated float64 BestPos. int64 dtype: object
# Best 5 teams
df[df['Pos.'] <= 5]
Pos. | Club | Seasons | Pld | Win | Draw | Loss | GF | GA | GD | Pts | 1st | 2nd | 3rd | 4th | Relegated | BestPos. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Manchester United | 27 | 1038 | 648 | 224 | 166 | 1989 | 929 | 1060 | 2168 | 13.0 | 6.0 | 3.0 | 1.0 | NaN | 1 |
1 | 2 | Arsenal | 27 | 1038 | 565 | 260 | 213 | 1845 | 1013 | 832 | 1955 | 3.0 | 6.0 | 5.0 | 7.0 | NaN | 1 |
2 | 3 | Chelsea | 27 | 1038 | 558 | 257 | 223 | 1770 | 1002 | 768 | 1931 | 5.0 | 4.0 | 5.0 | 2.0 | NaN | 1 |
3 | 4 | Liverpool | 27 | 1038 | 529 | 262 | 247 | 1774 | 1046 | 728 | 1849 | NaN | 4.0 | 5.0 | 7.0 | NaN | 2 |
4 | 5 | Tottenham Hotspur | 27 | 1038 | 446 | 257 | 335 | 1547 | 1306 | 241 | 1595 | NaN | 1.0 | 2.0 | 3.0 | NaN | 2 |
# Only number 5
df[df['Pos.'] == 5]
Pos. | Club | Seasons | Pld | Win | Draw | Loss | GF | GA | GD | Pts | 1st | 2nd | 3rd | 4th | Relegated | BestPos. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
4 | 5 | Tottenham Hotspur | 27 | 1038 | 446 | 257 | 335 | 1547 | 1306 | 241 | 1595 | NaN | 1.0 | 2.0 | 3.0 | NaN | 2 |
# More than 3 1st and aslo more than 560 Win
df[(df['1st'] >= 3) & (df['Win'] > 560)]
Pos. | Club | Seasons | Pld | Win | Draw | Loss | GF | GA | GD | Pts | 1st | 2nd | 3rd | 4th | Relegated | BestPos. | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Manchester United | 27 | 1038 | 648 | 224 | 166 | 1989 | 929 | 1060 | 2168 | 13.0 | 6.0 | 3.0 | 1.0 | NaN | 1 |
1 | 2 | Arsenal | 27 | 1038 | 565 | 260 | 213 | 1845 | 1013 | 832 | 1955 | 3.0 | 6.0 | 5.0 | 7.0 | NaN | 1 |