#Grab the data as parsed XML elements in a list
race='data/F1 Race.txt'
from lxml import etree
pl=[]
for el in open(race, 'r'):
pl.append(etree.fromstring(el))
#Column mappings for the timing screen
raceMap={
'1':'classpos',
'2':'racingNumber',
'3':'name',
'4':'gap',
'5':'interval',
'6':'laptime',
'7':'sector1',
'9':'sector2',
'11':'sector3',
'12':'pitlap',
'13':'pitcount'
}
raceUnMap = { raceMap[k]: k for k in raceMap}
raceUnMap
#Make use of row semantics (row is classification position)
#Generate a classification position/time datastructure for each driverNumber
def parse_race_pos_all(r,pos,c):
'''r is data element; pos is dataframe; c is column'''
if r.attrib['identifier']=='101' and 'sessionstate' not in r[0].attrib:
cn=raceMap[c]
if r[0].attrib['column']==c:
tt=r.attrib['timestamp'].replace('.',':').split(':')
ttx=datetime.time(int(tt[0]),int(tt[1]),int(tt[2]),int(tt[3])*1000)#.strftime("%H:%M:%S:%f")
pos=pd.concat([ pos,pd.DataFrame( {
cn:r[0].attrib['value'],
cn+"_colour":r[0].attrib['colour'],
"pos":r[0].attrib['row']
},index=[ttx]) ])
return pos
#This is a bit quirkier - driverNumber over time for a particular classification number
def parse_race_pos_num(r,pos,c,p):
'''r is data element; pos is dataframe; c is column; p is position we're interested in'''
if r.attrib['identifier']=='101' and 'sessionstate' not in r[0].attrib:
cn=raceMap[c]
if r[0].attrib['column'] ==c and r[0].attrib['row'] ==p:
tt=r.attrib['timestamp'].replace('.',':').split(':')
ttx=datetime.time(int(tt[0]),int(tt[1]),int(tt[2]),int(tt[3])*1000)#.strftime("%H:%M:%S:%f")
pos=pd.concat([ pos,pd.DataFrame( {cn:r[0].attrib['value'], cn+"_colour":r[0].attrib['colour']},index=[ttx]) ])
return pos
import pandas as pd
pnum=pd.DataFrame()
for l in pl:
pnum=parse_race_pos_all(l,pnum,raceUnMap['racingNumber'])
pnum['time'] = pnum.index
pnum['pos']=pnum['pos'].astype(int)
pnum=pnum[pnum.racingNumber!='']
pnum[:3]
from ggplot import *
ggplot(pnum[pnum.pos<23],aes(x='time',y='pos',colour='racingNumber'))+geom_line()+ylim(0,23)
plj=[]
for el in open(race, 'r'):
r=etree.fromstring(el)
d={}
for a in r.attrib: d[a]=r.attrib[a]
for a in r[0].attrib: d[a]=r[0].attrib[a]
if d['identifier']=='101' and 'row' in d and d['column'] in ['1','2','3','4','5','6','7','9','11','12','13']:
cn=raceMap[d['column']]
did= '_'.join([ d['row'],d['timestamp'].replace(':','_').replace('.','_') ])
dd={cn:d['value'], cn+"_colour":d['colour'],'id':did,'row': d['row'],'strtime':d['timestamp']}
plj.append(dd)
plj[:3]
import pymongo
from pymongo import MongoClient
import json
#Connect to the MongoDB server
conn = MongoClient('localhost', 27017)
#Create a new database
db = conn.tataracedb
db.drop_collection('test1')
#Create a new collection
collection = db.test1
collection
for i in plj:
collection.update({'_id':i['id']},{"$set":i},upsert=True)
collection.find_one()
results=collection.find({'$and': [ { 'racingNumber': '3' }, { 'gap': { '$exists': True } } ]})
results.count()
df=pd.DataFrame(list(results))
df[:3]
def timeify(x):
x=x.replace(':','_').replace('.','_')
tt=x.split('_')
return datetime.time(int(tt[0]),int(tt[1]),int(tt[2]),int(tt[3])*1000)
df['time']=df['strtime'].apply(lambda x: timeify(x))
ggplot(df,aes(x='time',y='gap'))+geom_line()
results=collection.find({'$and': [
#{ 'racingNumber': { '$exists': True } },
{ 'gap': { '$exists': True } },
{ '$or': [ {'row':'2'}, {'row':'3'},{'row':'4'},{'row':'5'}]}
]})
df=pd.DataFrame(list(results))
df['time']=df['strtime'].apply(lambda x: timeify(x))
df.gap=df.gap.convert_objects(convert_numeric=True)
#df.dropna(subset=['gap','racingNumber'],inplace=True)
ggplot(df,aes(x='time',y='gap',colour='row'))+geom_line()
df.gap.unique()
#results=collection.find({'$and': [{'row': '3'}, {'racingNumber': '3'}] },{'row':1,'strtime':1} )
results=collection.find({'row': '3' },{'row':1,'strtime':1} )
results.count()
df=pd.DataFrame(list(results))
df[:3]
def itimeify(x):
tt=x.split('_')
return (int(tt[0])*3600000+int(tt[1])*60000+int(tt[2])*1000+int(tt[3]))/1000
df['itime']=df['time'].apply(lambda x: itimeify(x))
df['time']=df['time'].apply(lambda x: timeify(x))
df[:3]
ggplot(df[11:-5], aes(x='itime',y=1))+geom_point()
#Is there anything we can do based around events on the same row that are within a short time of each other?
#If there is a position change, details will refer to what in the row? Just the new driver?
#Let's try to group close in time items...
#Based on http://stackoverflow.com/a/10017017/454773
d=list(df['itime'][11:-5])
diff = [y - x for x, y in zip(*[iter(d)] * 2)]
#avg = sum(diff) / len(diff)
m = [[d[0]]]
for x in d[1:]:
#if x - m[-1][0] < avg:
if x - m[-1][0] < 0.5: #this is the threshold
m[-1].append(x)
else:
m.append([x])
print(m)
from numpy import nan as NA
race='data/F1 Race.txt'
from lxml import etree
#not used
class Driver:
sectors=[NA,NA,NA]
prev_sectors=[NA,NA,NA]
history_sectors=[[],[],[]]
def __init__(self, name,driverNum):
self.name = name
self.driverNum = driverNum
def update_sector(self,sectorNum,sectorTime):
self.prev_sectors[sectorNum-1]=self.sectors[sectorNum-1]
self.sectors[sectorNum-1]=sectorTime
self.history_sectors[sectorNum-1].append(sectorTime)
ham=Driver("hamilton","44")
ham.name
ham.update_sector(1,33.12)
print(ham.sectors,ham.prev_sectors,ham.history_sectors)
#not used
class Classification:
def __init__(self):
self.rank={}
ranks=Classification()
ranks
class Pits:
def __init__(self):
self._items={}
self.history=[]
self._logger=[]
#self._driverNum=NA
#self._lap=NA
#self._strtime=None
#self._count=NA
def updateRacePits(self,raceObj,data):
#Need 2, 6 and 13 then commit
#This doesn't work if there is a position change while someone is IN PIT?
#SO: car has to go OUT before it can come back in?
#Also need to catch if car IN PIT then goes to RETIRED
driverNum=raceObj.getDriverNumFromPos(data)
if data['column'] in ['2','6','13']:
if driverNum not in self._items:
self._items[driverNum]={'out':True}
if data['column']=='2' and data['colour']=='RED':
self._logger.append(data)
self._items[driverNum]['driverNum']=str(driverNum)+'_'+data['value']
self._items[driverNum]['strtime']=data['strtime']
#Note: driverNum should == int(data['value'])
elif data['column']=='6':
if data['value']=='IN PIT':
self._logger.append(data)
self._items[driverNum]['status']=data['value']
elif data['value']=='OUT':
self._items[driverNum]={'out':True}
elif data['column']=='13' and data['value']!='':
self._logger.append(data)
self._items[driverNum]['lap']=raceObj.lap
self._items[driverNum]['count']=int(data['value'])
if len ( self._items[driverNum].keys()) ==6 and self._items[driverNum]['out'] :
#print('dd')
self.history.append(self._items[driverNum].copy())
raceObj.drivers[driverNum].appendPit(self._items[driverNum].copy())
self._items[driverNum]={'out':False}
class Purples:
def __init__(self):
self.lap={}
self.sector1={}
self.sector2={}
self.sector3={}
self._lap = {'driverNum':'',"name":'','laptime':'','classRank':'','lap':'','strtime':''}
self._sector1={'driverNum':'',"name":'','laptime':'','classRank':'','lap':'','strtime':''}
self._sector2={'driverNum':'',"name":'','laptime':'','classRank':'','lap':'','strtime':''}
self._sector3={'driverNum':'',"name":'','laptime':'','classRank':'','lap':'','strtime':''}
self.history=[]
def updateLapPurples(self,raceObj,data):
''' '''
if data['column']=='2':
self._lap['driverNum']=data['value']
self._lap['strtime']=data['strtime']
self._lap['classRank']=int(data['row'])
elif data['column']=='3':
self._lap['name']=data['value']
elif data['column']=='6':
self._lap['laptime']=data['value']
self._lap['lap']=raceObj.lap-1
self.lap=self._lap.copy()
self.history.append(self.lap)
def updateSectorPurples(self,raceObj,sector,data):
pass
class DriverHistory:
def __init__(self,driverNum=''):
self.driverNum=driverNum
self.pits=[]
self._laptime=''
self._sector1time=''
self._sector2time=''
self._sector3time=''
self._gap=''
self._interval=''
self.lapdata=[]
def setname(self,name):
self.name=name
def appendPit(self,data):
self.pits.append(data)
def trackLaptime(self,data):
if data['row']=='1':
self._gap='0'
self._interval='0'
if data['column'] == '6':
self._laptime=data['value']
self._timeofday=data['strtime']
elif data['column'] == '7': self._sector1time=data['value']
elif data['column'] =='9': self._sector2time=data['value']
elif data['column'] =='11': self._sector3time=data['value']
elif data['column'] =='4': self._gap=data['value']
elif data['column'] =='5': self._interval=data['value']
if self._laptime != '' and self._sector1time!='' and self._sector2time!='' \
and self._sector3time!='' and self._gap!='' and self._interval!='':
if raceObj.lap > (len(self.lapdata)+1):
#if len(self.lapdata)==0 or (self._timeofday != self.lapdata[-1]['timeofday'] ):
self.lapdata.append({
'timeofday':self._timeofday,
'pos':int(data['row']),
'lap':raceObj.lap-1,
'laptime':self._laptime,
's1':self._sector1time,
's2':self._sector2time,
's3':self._sector3time,
'gap':self._gap,
'interval':self._interval})
self._laptime=''
self._sector1time=''
self._sector2time=''
self._sector3time=''
self._gap=''
self._interval=''
class Race:
def __init__(self):
self.lap=-1
self.pos={} #Contains the driver number for the current classification position
self.lapPurples=Purples()
self.pits=Pits()
self.drivers={}
self.driverNames={}
self._namesSet=False
def setDriverNumForPos(self,data):
self.pos[data['row']]=data['value']
def getDriverNumFromPos(self,data):
return self.pos[data['row']]
def setupDriverDetails(self,data):
if data['column']=='2' and data['value'].strip()!='' and data['value'] not in self.drivers:
self.setDriverNumForPos(data)
self.drivers[data['value']]=DriverHistory(data['value'])
elif data['column']=='3' and data['value'].strip()!='':
driverNum=self.getDriverNumFromPos(data)
self.drivers[driverNum].setname(data['value'])
self.driverNames[data['value']]=driverNum
def trackDriverNumForPos(self,data):
if data['column']=='2':
self.setDriverNumForPos(data)
def trackLapCount(self,data):
if data['row']=='1' and data['column']=='5' and data['value']!='':
self.lap=int(data['value'])
def trackPurples(self,raceObj,data):
if data['colour']=='PURPLE' and data['column'] in ['2','3','6']:
self.lapPurples.updateLapPurples(raceObj,data)
def trackPits(self,raceObj,data):
if data['column'] in ['2','6','13']:
self.pits.updateRacePits(raceObj,data)
def trackLaptimes(self,raceObj,data):
if data['column'] in ['4','5','6','7','9','11']:
driverNum=self.getDriverNumFromPos(data)
self.drivers[driverNum].trackLaptime(data)
class Weather:
def __init__(self):
self.stamps={}
def setWeather(self,data):
if data['strtime'] not in self.stamps:
self.stamps[data['strtime']]={}
d=self.stamps[data['strtime']]
if data['row']=='1':
d['trackTemp']=data['value']
elif data['row']=='2':
d['airTemp']=data['value']
elif data['row']=='3':
d['rainfall']=data['value']
elif data['row']=='4':
d['windSpeed']=data['value']
elif data['row']=='5':
d['humidity']=data['value']
elif data['row']=='6':
d['airpressure']=data['value']
elif data['row']=='7':
d['windDir']=data['value']
plf=[]
wlf=[]
for el in open(race, 'r'):
r=etree.fromstring(el)
d={}
for a in r.attrib: d[a]=r.attrib[a]
for a in r[0].attrib: d[a]=r[0].attrib[a]
if d['identifier']=='101' and 'row' in d and d['column'] in ['1','2','3','4','5','6','7','9','11','12','13']:
dd={"value":d['value'], "colour":d['colour'],'row': d['row'],'column': d['column'],'strtime':d['timestamp']}
#did= '_'.join([ d['row'],d['timestamp'].replace(':','_').replace('.','_') ])
#dd={"value":d['value'], "colour":d['colour'],'id':did,'row': d['row'],'column': d['column'],'strtime':d['timestamp']}
plf.append(dd)
elif d['identifier']=='101' and 'sessionstate' in d:
plf.append(d)
elif d['identifier']=='103' and d['column']=='1':
dd={'strtime':d['timestamp'],'row': d['row'],"value":d['value']}
wlf.append(dd)
plf[:3]
def checkSessionState(data):
if ('sessionstate' in data):
return data['sessionstate']
return
raceObj=Race()
startedRunning=False
finishedRunning=False
#raceHistory=RaceHistory()
for x in plf:
sessionstate=checkSessionState(x)
if sessionstate != None: print(sessionstate)
if sessionstate=="started":
startedRunning=True
elif startedRunning and sessionstate=="inactive":
finishedRunning=True
if not startedRunning and sessionstate == None:
raceObj.setupDriverDetails(x)
elif startedRunning and not finishedRunning and sessionstate == None:
raceObj.trackLapCount(x)
raceObj.trackDriverNumForPos(x)
raceObj.trackPurples(raceObj,x)
raceObj.trackPits(raceObj,x)
raceObj.trackLaptimes(raceObj,x)
#raceHistory.lapPurples.history
weather=Weather()
for w in wlf:
weather.setWeather(w)
weather.stamps['14:34:21.752']
raceObj.drivers['1'].lapdata[-3:]
raceObj.drivers['1'].lapdata[:5]
raceObj.lapPurples.history[:3]
raceObj.drivers['1'].pits
raceObj.drivers['1'].name
In this example, row 17 starts to update high columns before the lower ones
raceObj.pits._logger[:3]
raceObj.pits.history[:3]
raceObj.pos
import csv
from time import *
#f=open('openurlgource.csv', 'rb')
#reader = csv.reader(f, delimiter='\t')
writer = csv.writer(open('openurlgource.txt','wb'),delimiter='|')
headerline = reader.next()
for row in reader:
if row[4].strip() !='':
t=int(mktime(strptime(row[0]+" "+row[1], "%Y-%m-%d %H:%M:%S")))
writer.writerow([t,row[3],'A',row[4].rstrip(':').replace(':','/')])