import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
url='http://en.wikipedia.org/wiki/List_of_airports_in_Hungary'
df=pd.read_html(url)
df=df[0].loc[:6].T.set_index(0).T.loc[2:].set_index('IATA')
df
Location served | County | ICAO | Airport name | Elev. | Runways | |
---|---|---|---|---|---|---|
IATA | ||||||
BUD | Budapest | (Capital) | LHBP | Budapest Ferenc Liszt International Airport | 151 m (495 ft) | 3010 m x 59 m 3707 x 59 m |
DEB | Debrecen | Hajdú-Bihar | LHDC | Debrecen International Airport | 109 m (359 ft) | 2498 m x 40 m |
SOB | Sármellék | Zala | LHSM | Hévíz-Balaton Airport | 124 m (408 ft) | 2500 x 60 m |
QGY | Győr-Pér | Győr-Moson-Sopron | LHPR | Győr-Pér International Airport | 129 m (424 ft) | 2030 x 30 m 1134 x 43 m |
QPJ | Pécs-Pogány | Baranya | LHPP | Pécs-Pogány International Airport | 305 m (1000 ft) | 1500 x 30 m |
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
locations={}
for i in df.index:
results = Geocoder(apik).geocode(i+' airport Hungary')
locations[i]=results[0].coordinates
print i
BUD DEB SOB QGY QPJ
file("locations_hu.json",'w').write(json.dumps(locations))
locations=json.loads(file('locations_hu.json','r').read())
import requests
i
u'QPJ'
airportialinks={}
for i in locations:
print i,
if i=='QPJ': url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+'PEV'+'+airport+hungary'
else: url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+hungary'
m=requests.get(url).content
z=pd.read_html(m)[5][0][0]
z=z[z.find('http'):]
airportialinks[i]=z
print z
QPJ https://www.airportia.com/hungary/pécs_pogány-airport/map/ DEB https://www.airportia.com/hungary/debrecen-international-airport SOB https://www.airportia.com/hungary/sármellék...airport/arrivals BUD https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport QGY https://www.airportia.com/hungary/győr_pér...airport/photos
#reformat
for z in airportialinks:
airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
if airportialinks[z][-1]!='/':airportialinks[z]+='/'
#manual fixes
if z=='QGY':airportialinks[z]=u'https://www.airportia.com/hungary/győr_pér-international-airport/'
print airportialinks[z]
https://www.airportia.com/hungary/pécs_pogány-airport/map/ https://www.airportia.com/hungary/debrecen-international-airport/ https://www.airportia.com/hungary/sármellék-international-airport/ https://www.airportia.com/hungary/győr_pér-international-airport/ https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/
sch={}
record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.
for i in locations:
print i
if i not in sch:sch[i]={}
#march 11-24 = 2 weeks
for d in range (11,25):
if d not in sch[i]:
try:
url=airportialinks[i]
full=url+'departures/201703'+str(d)
m=requests.get(full).content
sch[i][full]=pd.read_html(m)[0]
#print full
except: pass #print 'no tables',i,d
QPJ DEB SOB BUD QGY
for i in range(11,25):
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/201703'+str(i)
print 'nr. of flights on March',i,':',len(sch['BUD'][testurl])
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/20170318'
k=sch['BUD'][testurl]
k[k['To']=='Frankfurt FRA']
nr. of flights on March 11 : 89 nr. of flights on March 12 : 115 nr. of flights on March 13 : 122 nr. of flights on March 14 : 101 nr. of flights on March 15 : 108 nr. of flights on March 16 : 107 nr. of flights on March 17 : 123 nr. of flights on March 18 : 87 nr. of flights on March 19 : 118 nr. of flights on March 20 : 124 nr. of flights on March 21 : 103 nr. of flights on March 22 : 110 nr. of flights on March 23 : 109 nr. of flights on March 24 : 124
Flight | To | Airline | Scheduled | Departure | Status | Unnamed: 6 | |
---|---|---|---|---|---|---|---|
12 | LH1343 | Frankfurt FRA | Lufthansa | 06:30 | NaN | Scheduled | Track > |
30 | LH1335 | Frankfurt FRA | Lufthansa | 10:45 | 10:50 | Landed | Track > |
57 | LH1339 | Frankfurt FRA | Lufthansa | 14:35 | NaN | Scheduled | Track > |
79 | LH1341 | Frankfurt FRA | Lufthansa | 18:45 | 18:57 | Landed | Track > |
sch
checks out with source
mdf=pd.DataFrame()
for i in sch:
for d in sch[i]:
df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
df['From']=i
df['Date']=d
mdf=pd.concat([mdf,df])
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]
k=mdf[mdf['Date']==testurl]
k[k['To']=='Frankfurt FRA']
To | Airline | From | Date | City | Airport | |
---|---|---|---|---|---|---|
12 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
30 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
57 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
79 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
mdf
checks out with source
file("mdf_hu_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))
len(mdf)
1572
airlines=set(mdf['Airline'])
cities=set(mdf['City'])
file("cities_hu_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_hu_dest.json",'w').write(json.dumps(list(airlines)))
citycoords={}
for i in cities:
if i not in citycoords:
if i==u'Birmingham': z='Birmingham, UK'
elif i==u'Valencia': z='Valencia, Spain'
elif i==u'Naples': z='Naples, Italy'
elif i==u'St. Petersburg': z='St. Petersburg, Russia'
elif i==u'Bristol': z='Bristol, UK'
elif i==u'Victoria': z='Victoria, Seychelles'
elif i==u'Washington': z='Washington, DC'
elif i==u'Odessa': z='Odessa, Ukraine'
else: z=i
citycoords[i]=Geocoder(apik).geocode(z)
print i
Venice Baku Kiev Istanbul Paris Oslo Riga Basel Cluj-Napoca Luxembourg Billund Gothenburg Nurnberg Lisbon Lanzarote Malmo Birmingham Naples Vienna Edinburgh Rotterdam Geneva Nice Tenerife Moscow Thessaloniki Munich Glasgow Larnaca Berlin Liverpool Leeds Dortmund Catania Manchester East Midlands Brussels Pisa Minsk Eilat Porto Dubai Eindhoven Malaga Helsinki Kutaisi Lyon Hamburg Dublin Dusseldorf Barcelona Athens Stuttgart Alicante Bologna Stockholm Bristol Treviso Tel Aviv Reykjavik Frankfurt Las Palmas Bucharest Beijing Luqa Belgrade Doha Zurich Madrid Prague Sofia Algiers Karlsruhe/Baden-Baden Cologne Milan Rome London Fuerteventura Cairo Bari Amsterdam Copenhagen Hurghada Warsaw
citysave={}
for i in citycoords:
citysave[i]={"coords":citycoords[i][0].coordinates,
"country":citycoords[i][0].country}
file("citysave_hu_dest.json",'w').write(json.dumps(citysave))