import pandas as pd
import requests
from bs4 import BeautifulSoup
#from tabulate import tabulate
res = requests.get("http://web.archive.org/web/20070701133815/http://www.bbmf.co.uk/september07.html")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
#print( tabulate(df[0], headers='keys', tablefmt='psql') )
df = df[1]
df.head()
0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|
0 | Date | Location | Lancaster | Spitfire | Hurricane | Dakota |
1 | September | September | September | September | September | September |
2 | 1 | Fort Nelson | NaN | S | NaN | NaN |
3 | NaN | Lydd - Display | L | S | H | NaN |
4 | NaN | Shackerstone - Display | NaN | S | NaN | NaN |
df = df.rename(columns=df.iloc[0])
df.head()
Date | Location | Lancaster | Spitfire | Hurricane | Dakota | |
---|---|---|---|---|---|---|
0 | Date | Location | Lancaster | Spitfire | Hurricane | Dakota |
1 | September | September | September | September | September | September |
2 | 1 | Fort Nelson | NaN | S | NaN | NaN |
3 | NaN | Lydd - Display | L | S | H | NaN |
4 | NaN | Shackerstone - Display | NaN | S | NaN | NaN |
df = df[(df['Location'] != "") & (df['Spitfire'] == 'S')]
df = df.dropna(axis='columns')
df
Location | Spitfire | |
---|---|---|
2 | Fort Nelson | S |
3 | Lydd - Display | S |
4 | Shackerstone - Display | S |
5 | Rye Meadows | S |
6 | Alfrick | S |
7 | Lydd - Display | S |
8 | Chart Sutton | S |
9 | Gedling | S |
13 | Darley | S |
14 | RAF Wittering | S |
15 | RAF Honington | S |
16 | RAF Leeming | S |
17 | Penistone | S |
18 | Menwith Hill | S |
19 | Leuchars Charity Ball - Display | S |
24 | Duxford - Display | S |
25 | Faldingworth | S |
26 | Donington Park | S |
27 | Cliveden | S |
28 | Snetterton - Display | S |
29 | Duxford - Display | S |
31 | Cliveden | S |
32 | Kemble - Display | S |
33 | Donington Park | S |
34 | Morecambe - Display | S |
37 | King Lynn | S |
38 | Norwich County Hall | S |
39 | Norwich City Hall | S |
40 | Odiham | S |
41 | Shawbury | S |
... | ... | ... |
47 | RAF Halton | S |
48 | RAF Wittering | S |
49 | RAF Coningsby | S |
50 | RAF Leeming | S |
51 | RAF Kirkton in Lindsey | S |
52 | Middle Wallop - Display | S |
53 | Shoreham - Display | S |
56 | Chichester | S |
57 | Boston | S |
58 | Sheringham | S |
59 | Costessey | S |
61 | Bentley Priory - Display | S |
62 | Little Casterton | S |
63 | Collingham | S |
66 | Shoreham - Display | S |
67 | Newhaven Fort | S |
68 | Staplehurst | S |
69 | Weald of Kent | S |
72 | Long Sutton | S |
73 | Sheringham | S |
74 | Costessey | S |
75 | Norwich Cathedral | S |
76 | Boston | S |
77 | Shrivenham | S |
80 | RAF Scampton | S |
81 | RAF Digby | S |
86 | Hanley Stoke on Trent | S |
87 | Southport - Display | S |
88 | Southport - Display | S |
89 | Flixton | S |
65 rows × 2 columns
df[(df['Location'] == 'Boston') & (df['Spitfire'] == 'S')]
Location | Spitfire | |
---|---|---|
57 | Boston | S |
76 | Boston | S |