In [1]:
import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [2]:
url='http://en.wikipedia.org/wiki/List_of_airports_in_Hungary'
df=pd.read_html(url)
df=df[0].loc[:6].T.set_index(0).T.loc[2:].set_index('IATA')
In [3]:
df
Out[3]:
Location served County ICAO Airport name Elev. Runways
IATA
BUD Budapest (Capital) LHBP Budapest Ferenc Liszt International Airport 151 m (495 ft) 3010 m x 59 m 3707 x 59 m
DEB Debrecen Hajdú-Bihar LHDC Debrecen International Airport 109 m (359 ft) 2498 m x 40 m
SOB Sármellék Zala LHSM Hévíz-Balaton Airport 124 m (408 ft) 2500 x 60 m
QGY Győr-Pér Győr-Moson-Sopron LHPR Győr-Pér International Airport 129 m (424 ft) 2030 x 30 m 1134 x 43 m
QPJ Pécs-Pogány Baranya LHPP Pécs-Pogány International Airport 305 m (1000 ft) 1500 x 30 m
In [4]:
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
In [5]:
locations={}
for i in df.index:
    results = Geocoder(apik).geocode(i+' airport Hungary')
    locations[i]=results[0].coordinates
    print i
BUD
DEB
SOB
QGY
QPJ
In [6]:
file("locations_hu.json",'w').write(json.dumps(locations))
In [7]:
locations=json.loads(file('locations_hu.json','r').read())
In [8]:
import requests
In [9]:
i
Out[9]:
u'QPJ'
In [10]:
airportialinks={}
for i in locations:
    print i,
    if i=='QPJ': url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+'PEV'+'+airport+hungary'
    else: url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+hungary'
    m=requests.get(url).content
    z=pd.read_html(m)[5][0][0]
    z=z[z.find('http'):]
    airportialinks[i]=z
    print z
QPJ https://www.airportia.com/hungary/pécs_pogány-airport/map/
DEB https://www.airportia.com/hungary/debrecen-international-airport
SOB https://www.airportia.com/hungary/sármellék...airport/arrivals
BUD https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport
QGY https://www.airportia.com/hungary/győr_pér...airport/photos
In [11]:
#reformat
for z in airportialinks:
    airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
    if airportialinks[z][-1]!='/':airportialinks[z]+='/' 
    #manual fixes
    if z=='QGY':airportialinks[z]=u'https://www.airportia.com/hungary/győr_pér-international-airport/'
    print airportialinks[z]
https://www.airportia.com/hungary/pécs_pogány-airport/map/
https://www.airportia.com/hungary/debrecen-international-airport/
https://www.airportia.com/hungary/sármellék-international-airport/
https://www.airportia.com/hungary/győr_pér-international-airport/
https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/
In [12]:
sch={}

record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.

In [13]:
for i in locations:
    print i
    if i not in sch:sch[i]={}
    #march 11-24 = 2 weeks
    for d in range (11,25):
        if d not in sch[i]:
            try:
                url=airportialinks[i]
                full=url+'departures/201703'+str(d)
                m=requests.get(full).content
                sch[i][full]=pd.read_html(m)[0]
                #print full
            except: pass #print 'no tables',i,d
QPJ
DEB
SOB
BUD
QGY
In [57]:
for i in range(11,25):
    testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/201703'+str(i)
    print 'nr. of flights on March',i,':',len(sch['BUD'][testurl])
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/20170318'
k=sch['BUD'][testurl]
k[k['To']=='Frankfurt FRA']
nr. of flights on March 11 : 89
nr. of flights on March 12 : 115
nr. of flights on March 13 : 122
nr. of flights on March 14 : 101
nr. of flights on March 15 : 108
nr. of flights on March 16 : 107
nr. of flights on March 17 : 123
nr. of flights on March 18 : 87
nr. of flights on March 19 : 118
nr. of flights on March 20 : 124
nr. of flights on March 21 : 103
nr. of flights on March 22 : 110
nr. of flights on March 23 : 109
nr. of flights on March 24 : 124
Out[57]:
Flight To Airline Scheduled Departure Status Unnamed: 6
12 LH1343 Frankfurt FRA Lufthansa 06:30 NaN Scheduled Track >
30 LH1335 Frankfurt FRA Lufthansa 10:45 10:50 Landed Track >
57 LH1339 Frankfurt FRA Lufthansa 14:35 NaN Scheduled Track >
79 LH1341 Frankfurt FRA Lufthansa 18:45 18:57 Landed Track >

sch checks out with source

In [38]:
mdf=pd.DataFrame()
In [39]:
for i in sch:
    for d in sch[i]:
        df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
        df['From']=i
        df['Date']=d
        mdf=pd.concat([mdf,df])
In [40]:
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
In [41]:
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]
In [48]:
k=mdf[mdf['Date']==testurl]
k[k['To']=='Frankfurt FRA']
Out[48]:
To Airline From Date City Airport
12 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA
30 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA
57 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA
79 Frankfurt FRA Lufthansa BUD https://www.airportia.com/hungary/budapest-lis... Frankfurt FRA

mdf checks out with source

In [58]:
file("mdf_hu_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))
In [71]:
len(mdf)
Out[71]:
1572
In [72]:
airlines=set(mdf['Airline'])
In [73]:
cities=set(mdf['City'])
In [74]:
file("cities_hu_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_hu_dest.json",'w').write(json.dumps(list(airlines)))
In [75]:
citycoords={}
In [76]:
for i in cities:
    if i not in citycoords:
        if i==u'Birmingham': z='Birmingham, UK'
        elif i==u'Valencia': z='Valencia, Spain'
        elif i==u'Naples': z='Naples, Italy'
        elif i==u'St. Petersburg': z='St. Petersburg, Russia'
        elif i==u'Bristol': z='Bristol, UK'
        elif i==u'Victoria': z='Victoria, Seychelles'
        elif i==u'Washington': z='Washington, DC'
        elif i==u'Odessa': z='Odessa, Ukraine'
        else: z=i
        citycoords[i]=Geocoder(apik).geocode(z)
        print i
Venice
Baku
Kiev
Istanbul
Paris
Oslo
Riga
Basel
Cluj-Napoca
Luxembourg
Billund
Gothenburg
Nurnberg
Lisbon
Lanzarote
Malmo
Birmingham
Naples
Vienna
Edinburgh
Rotterdam
Geneva
Nice
Tenerife
Moscow
Thessaloniki
Munich
Glasgow
Larnaca
Berlin
Liverpool
Leeds
Dortmund
Catania
Manchester
East Midlands
Brussels
Pisa
Minsk
Eilat
Porto
Dubai
Eindhoven
Malaga
Helsinki
Kutaisi
Lyon
Hamburg
Dublin
Dusseldorf
Barcelona
Athens
Stuttgart
Alicante
Bologna
Stockholm
Bristol
Treviso
Tel Aviv
Reykjavik
Frankfurt
Las Palmas
Bucharest
Beijing
Luqa
Belgrade
Doha
Zurich
Madrid
Prague
Sofia
Algiers
Karlsruhe/Baden-Baden
Cologne
Milan
Rome
London
Fuerteventura
Cairo
Bari
Amsterdam
Copenhagen
Hurghada
Warsaw
In [77]:
citysave={}
for i in citycoords:
    citysave[i]={"coords":citycoords[i][0].coordinates,
                 "country":citycoords[i][0].country}
In [78]:
file("citysave_hu_dest.json",'w').write(json.dumps(citysave))