#!/usr/bin/env python # coding: utf-8 # In[34]: import pandas as pd, json, numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[35]: from pygeocoder import Geocoder apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk' # In[73]: locations=json.loads(file('locations_hu.json','r').read()) # In[74]: mdf_dest=pd.read_json(json.loads(file('mdf_hu_dest.json','r').read())) mdf_arrv=pd.read_json(json.loads(file('mdf_hu_arrv.json','r').read())) # In[75]: citysave_dest=json.loads(file('citysave_hu_dest.json','r').read()) citysave_arrv=json.loads(file('citysave_hu_arrv.json','r').read()) # In[76]: mdf_dest['ID']=mdf_dest['From'] mdf_dest.head() # In[77]: mdf_arrv['ID']=mdf_arrv['To'] mdf_arrv.head() # In[78]: mdf=pd.concat([mdf_dest,mdf_arrv]) # In[79]: len(mdf_dest) # In[80]: len(mdf_arrv) # In[81]: mdf # In[82]: mdg=mdf.set_index(['ID','City','Airport','Airline']) # In[83]: k=mdg.loc['BUD'].loc['Frankfurt'].loc['FRA'] testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/20170318' k[k['Date']==testurl] # In[84]: k=mdg.loc['BUD'].loc['Frankfurt'].loc['FRA'] testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/20170318' k[k['Date']==testurl] # In[85]: k=mdg.loc['BUD'].loc['Frankfurt'].loc['FRA'] for i in range(11,25): testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/departures/201703'+str(i) print 'BUD-FRA March',i, 'departures',len(k[k['Date']==testurl]), testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/201703'+str(i) print 'arrivals', len(k[k['Date']==testurl]) # In[88]: len(k)/14 # `mdg` checks out with source # In[89]: flights={} minn=1.0 #want to see minimum 1 flight in the past 2 weeks for i in mdg.index.get_level_values(0).unique(): #2 weeks downloaded. want to get weekly freq. but multi by 2 dept+arrv d=4.0 if i not in flights:flights[i]={} for j in mdg.loc[i].index.get_level_values(0).unique(): if len(mdg.loc[i].loc[j])>minn: #minimum 1 flights required in this period at least once every 2 weeks if j not in flights[i]:flights[i][j]={'airports':{},'7freq':0} flights[i][j]['7freq']=len(mdg.loc[i].loc[j])/d for k in mdg.loc[i].loc[j].index.get_level_values(0).unique(): if len(mdg.loc[i].loc[j].loc[k])>minn: if k not in flights[i][j]['airports']:flights[i][j]['airports'][k]={'airlines':{},'7freq':0} flights[i][j]['airports'][k]['7freq']=len(mdg.loc[i].loc[j].loc[k])/d for l in mdg.loc[i].loc[j].loc[k].index.get_level_values(0).unique(): try: if len(mdg.loc[i].loc[j].loc[k].loc[l])>minn: if l not in flights[i][j]['airports'][k]['airlines']:flights[i][j]['airports'][k]['airlines'][l]={'7freq':0} flights[i][j]['airports'][k]['airlines'][l]['7freq']=len(mdg.loc[i].loc[j].loc[k].loc[l])/d except:pass # In[90]: file("flights_hu.json",'w').write(json.dumps(flights))