import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
url='http://en.wikipedia.org/wiki/List_of_airports_in_Romania'
df=pd.read_html(url)
df=df[0].loc[:17].T.set_index(0).T.loc[2:].set_index('IATA')
df
City served | ICAO | Airport name | Website | Frequency | Status | nan | |
---|---|---|---|---|---|---|---|
IATA | |||||||
ARW | Arad | LRAR | Arad International Airport | http://www.aeroportularad.ro | TWR 130.2 MHz | NaN | NaN |
BCM | Bacău | LRBC | Bacău "George Enescu" International Airport [14] | http://www.bacauairport.ro | TWR 118.6 MHz | NaN | NaN |
BAY | Baia Mare / Tăuții-Măgherăuș | LRBM | Baia Mare Airport (Tăuții-Măgherăuș Airport) | http://www.baiamareairport.ro | TWR 123.6 MHz | Closed for renovation | NaN |
BBU | Bucharest / Băneasa | LRBS | Bucharest "Aurel Vlaicu" International Airport... | http://www.baneasa.aero | APP 127.6 MHz TWR 120.8 MHz | Only private flights | NaN |
OTP | Bucharest / Otopeni | LROP | Bucharest "Henri Coandǎ" International Airport... | http://www.otp-airport.ro | APP 126.2 TWR1 120.9 TWR2 121.85 | NaN | NaN |
CLJ | Cluj-Napoca | LRCL | Cluj "Avram Iancu" International Airport | http://www.airportcluj.ro | APP 125.1 MHz TWR 134.4 MHz | NaN | NaN |
CND | Constanța | LRCK | Constanța "Mihail Kogălniceanu" International ... | http://www.mk-airport.ro | TWR 120.24 MHz | NaN | NaN |
CRA | Craiova | LRCV | Craiova Airport | http://www.aeroportcraiova.ro | TWR 124.3 MHz | NaN | NaN |
IAS | Iași | LRIA | Iași International Airport | http://www.aeroport.ro | TWR 119.2 MHz | NaN | NaN |
OMR | Oradea | LROD | Oradea International Airport | http://www.aeroportoradea.ro | TWR 120.2 MHz | NaN | NaN |
SUJ | Satu Mare | LRSM | Satu Mare International Airport | http://www.aeroportulsm.ro | TWR 118.8 MHz | NaN | NaN |
SBZ | Sibiu | LRSB | Sibiu International Airport | NaN | TWR 122.7 MHz | NaN | NaN |
SCV | Suceava | LRSV | Suceava "Ștefan cel Mare" International Airport | http://www.aeroportsuceava.ro | APP 120.9 MHz TWR 118.3 MHz | NaN | NaN |
TGM | Târgu Mureș | LRTM | "Transilvania" Târgu Mureș Airport | http://www.targumuresairport.ro | APP 121.9 MHz TWR 125.9 MHz | NaN | NaN |
TSR | Timișoara | LRTR | Timișoara "Traian Vuia" International Airport ... | http://www.aerotim.ro | TWR 101.1 MHz | NaN | NaN |
TCE | Tulcea | LRTC | "Delta Dunarii" Tulcea Airport (Cataloi Airport) | http://www.aeroportul-tulcea.ro/ | APP/TWR 120.3 MHz | Closed | NaN |
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
locations={}
for i in df.index:
results = Geocoder(apik).geocode(i+' airport romania')
locations[i]=results[0].coordinates
print i
ARW BCM BAY BBU OTP CLJ CND CRA IAS OMR SUJ SBZ SCV TGM TSR TCE
file("locations_ro.json",'w').write(json.dumps(locations))
locations=json.loads(file('locations_ro.json','r').read())
import requests
airportialinks={}
for i in locations:
print i,
url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+romania'
m=requests.get(url).content
z=pd.read_html(m)[5][0][0]
z=z[z.find('http'):]
airportialinks[i]=z
print z
BCM https://www.airportia.com/romania/bacău-airport/ SCV https://www.airportia.com/romania/suceava-stefan-cel-mare-airport CLJ https://www.airportia.com/romania/cluj_napoca-international-airport ARW https://www.airportia.com/romania/arad-international-airport/arrivals SBZ https://www.airportia.com/romania/sibiu-international-airport/ SUJ https://www.airportia.com/romania/satu-mare-airport/arrivals BAY https://www.airportia.com/romania/tautii-magheraus-airport OMR https://www.airportia.com/romania/oradea-international-airport/ CND https://www.airportia.com/romania/mihail-kogălniceanu-international-airport CRA https://www.airportia.com/romania/craiova-airport/arrivals OTP https://www.airportia.com/romania/henri-coandă-international-airport BBU https://www.airportia.com/romania/băneasa...airport/departures TCE https://www.airportia.com/romania/tulcea-airport TSR https://www.airportia.com/romania/timişoara-traian-vuia-airport/ IAS https://www.airportia.com/romania/iaşi-airport/arrivals TGM https://www.airportia.com/romania/transilvania-târgu-mureş-international- airport
#reformat
for z in airportialinks:
airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
if airportialinks[z][-1]!='/':airportialinks[z]+='/'
#manual fixes
if z=='TSR':airportialinks[z]='https://www.airportia.com/romania/timişoara-traian-vuia-airport/'
print airportialinks[z]
https://www.airportia.com/romania/bacău-airport/ https://www.airportia.com/romania/tulcea-airport/ https://www.airportia.com/romania/cluj_napoca-international-airport/ https://www.airportia.com/romania/arad-international-airport/ https://www.airportia.com/romania/sibiu-international-airport/ https://www.airportia.com/romania/satu-mare-airport/ https://www.airportia.com/romania/tautii-magheraus-airport/ https://www.airportia.com/romania/oradea-international-airport/ https://www.airportia.com/romania/mihail-kogălniceanu-international-airport/ https://www.airportia.com/romania/craiova-airport/ https://www.airportia.com/romania/henri-coandă-international-airport/ https://www.airportia.com/romania/băneasa-international-airport/ https://www.airportia.com/romania/suceava-stefan-cel-mare-airport/ https://www.airportia.com/romania/timişoara-traian-vuia-airport/ https://www.airportia.com/romania/iaşi-airport/ https://www.airportia.com/romania/transilvania-târgu-mureş-international-airport/
sch={}
record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.
for i in locations:
print i
if i not in sch:sch[i]={}
if i!='TGM':
#march 11-24 = 2 weeks
for d in range (11,25):
if d not in sch[i]:
try:
url=airportialinks[i]
full=url+'departures/201703'+str(d)
m=requests.get(full).content
sch[i][full]=pd.read_html(m)[0]
#print full
except: pass #print 'no tables',i,d
else:
#november 17-30 = 2 weeks
for d in range (17,31):
if d not in sch[i]:
try:
url=airportialinks[i]
full=url+'departures/201611'+str(d)
m=requests.get(full).content
sch[i][full]=pd.read_html(m)[0]
#print full
except: pass #print 'no tables',i,d
BCM SCV CLJ ARW SBZ SUJ BAY OMR CND CRA OTP BBU TCE TSR IAS TGM
mdf=pd.DataFrame()
for i in sch:
for d in sch[i]:
df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
df['From']=i
df['Date']=d
mdf=pd.concat([mdf,df])
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
mdf['City']=[i[:i.rfind(' ')] for i in mdf['To']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['To']]
file("mdf_ro_dest.json",'w').write(json.dumps(mdf.reset_index().to_json()))
len(mdf)
3034
airlines=set(mdf['Airline'])
cities=set(mdf['City'])
file("cities_ro_dest.json",'w').write(json.dumps(list(cities)))
file("airlines_ro_dest.json",'w').write(json.dumps(list(airlines)))
citycoords={}
for i in cities:
if i not in citycoords:
if i==u'Birmingham': z='Birmingham, UK'
elif i==u'Valencia': z='Valencia, Spain'
elif i==u'Naples': z='Naples, Italy'
elif i==u'St. Petersburg': z='St. Petersburg, Russia'
elif i==u'Bristol': z='Bristol, UK'
else: z=i
citycoords[i]=Geocoder(apik).geocode(z)
print i
Kiev Paris Oslo Basel Beirut Zaragoza Liverpool Verona Malmo Castellon de la Plana Bologna Catania Treviso Brussels Bucharest Dubai Dublin Rome Varna Luqa Pescara Cologne Milan London Karlsruhe/Baden-Baden Strasbourg Dortmund Cluj-Napoca Nurnberg Amman Chisinau Vienna Moscow Bratislava Berlin Katowice Weeze Eindhoven Stuttgart Alicante Tenerife Frankfurt Thessaloniki Zurich Perugia Madrid Bari Doncaster Lyon Istanbul Pisa Turin Nice Larnaca Memmingen Hannover Malaga Hamburg Stockholm Tel Aviv Timisoara Doha Birmingham Florence Athens Satu Mare Oradea Valencia Naples Geneva Sibiu Munich Glasgow Alghero Budapest Dusseldorf Barcelona Billund Bristol Iasi Belgrade Prague Sofia Suceava Lisbon Amsterdam Copenhagen Warsaw
citysave={}
for i in citycoords:
citysave[i]={"coords":citycoords[i][0].coordinates,
"country":citycoords[i][0].country}
file("citysave_ro_dest.json",'w').write(json.dumps(citysave))