#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd, numpy as np # In[38]: #load list of all circuits and years with races from wikipedia df=pd.read_html('http://en.wikipedia.org/wiki/List_of_Formula_One_circuits', header=0) # In[41]: df=df[2][['Circuit','Location','Season(s)','Map']] # In[81]: #geocode circuit names and create list with circuits from pygeocoder import Geocoder circs=[] apikey='AIzaSyCJJD4hDxsENJOVohntPCqgvsuvQ-yRgLY' for i in df.T.iteritems(): circ={} circ['name']=i[1][0] if repr(i[1][2]).lower()=='nan': circs[-1]['races']=circs[-1]['races']+', '+i[1][3] else: circ['races']=i[1][2] circ['place']=i[1][1] circ['coord']=Geocoder(apikey).geocode(circ['place']).coordinates circs.append(circ) print circs[-1] # In[82]: #manual fixes circs[41]['races']='1950-1960, 2000-2007' # In[83]: calendar={i:[] for i in range(1950,2018)} for i in range(len(circs)): for k in circs[i]['races'].replace(u'\u2013', '-').replace(" ", ",").replace(",,", ",").split(","): r=k.find('-') if r==-1: calendar[np.int(k)].append(i) else: for j in range(np.int(k[:r]),np.int(k[r+1:])+1): calendar[j].append(i) # In[84]: #save data import json file('calendar2018.json','w').write(json.dumps(calendar)) file('circs2018.json','w').write(json.dumps(circs))