#!/usr/bin/env python # coding: utf-8 # In[41]: import pandas as pd, numpy as np # In[42]: #load list of all circuits and years with races from wikipedia df=pd.read_html('https://en.wikipedia.org/wiki/List_of_World_Rally_Championship_rallies', header=0) # In[44]: df=df[1][['Rally','Headquarters','WRC years','Location']] # In[47]: #geocode circuit names and create list with circuits from pygeocoder import Geocoder circs=[] apikey='AIzaSyCJJD4hDxsENJOVohntPCqgvsuvQ-yRgLY' for i in df.T.iteritems(): circ={} g=i[1][0] if '[' in g: g=g[:g.find('[')] circ['name']=g g=i[1][2] if '[' in g: g=g[:g.find('[')] circ['races']=g g=i[1][1] if '[' in g: g=g[:g.find('[')] circ['place']=g+', '+i[1][3] if g=='Gap': circ['place']='Gap, France' circ['coord']=Geocoder(apikey).geocode(circ['place']).coordinates circs.append(circ) print circs[-1] # In[48]: calendar={i:[] for i in range(1973,2019)} for i in range(len(circs)): g=circs[i]['races'] if '[' in g: g=g[:g.find('[')] for k in g.replace(u'\u2013', '-').strip().replace(" ", ",").replace(",,", ",").replace(",,", ",").split(","): r=k.find('-') if r==-1: calendar[np.int(k)].append(i) else: for j in range(np.int(k[:r]),np.int(k[r+1:])+1): calendar[j].append(i) # In[49]: #save data import json file('calendar_wrc2018.json','w').write(json.dumps(calendar)) file('circs_wrc2018.json','w').write(json.dumps(circs)) # In[ ]: