#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd, numpy as np # In[20]: #load list of all circuits and years with races from wikipedia df=pd.read_html('http://en.wikipedia.org/wiki/List_of_Formula_One_circuits',infer_types=False, header=0)[3].\ drop(['Direction','Type','Grands Prix held','Map','Current Length','Grands Prix'],axis=1) # In[151]: #geocode circuit names and create list with circuits from pygeocoder import Geocoder circs=[] for i in df.T.iteritems(): circ={} circ['name']=i[1][0] circ['coord']=Geocoder.geocode(i[1][1]).coordinates circ['races']=i[1][2].replace(u'\u2013', '-') circ['place']=i[1][1] circs.append(circ) # In[153]: calendar={i:[] for i in range(1950,2015)} for i in range(len(circs)): for k in circs[i]['races'].replace(" ", ",").replace(",,", ",").split(","): r=k.find('-') if r==-1: calendar[np.int(k)].append(i) else: for j in range(np.int(k[:r]),np.int(k[r+1:])+1): calendar[j].append(i) # In[154]: #save data import json file('calendar.json','w').write(json.dumps(calendar)) file('circs.json','w').write(json.dumps(circs))