import csv import numpy as np %matplotlib inline import matplotlib as plt import pylab as pl baseFileName = '2010.tab' # dataset with open(baseFileName, 'rb') as f: reader = csv.DictReader(f, dialect = 'excel', delimiter = '\t') base = [dict([key, value] for key, value in row.iteritems()) for row in reader] winners = [r['Winner'] for r in base] losers = [r['Loser'] for r in base] players = np.unique(winners + losers) n = len(players) print "number of players: ", n def getindex(str, a): return [i for (i, val) in enumerate(a) if val == str] getindex('Williams V.',[r['Loser'] for r in base]) R = np.zeros((n,n)) pts = np.zeros((n,1)) for r in base: # get Winner and Loser. Assign 1 to winner row and loser column win = r['Winner'] los = r['Loser'] wind = getindex(win, players) lind = getindex(los, players) # Assign 1 to winner vs loser R[wind,lind] = 1 if r['WPts'] != 'N/A': if int(r['WPts']) > pts[wind]: pts[wind] = int(r['WPts']) if r['LPts'] != 'N/A': if int(r['LPts']) > pts[lind]: pts[lind] = int(r['LPts']) pl.imshow(R, interpolation='none') r = np.ones((n,1)) for i in range(10): r = np.dot(R,r) r = r/np.linalg.norm(r) eigenrank = r[:,0].argsort() eigenrank = eigenrank[::-1] wtp_rank = pts[:,0].argsort() wtp_rank = wtp_rank[::-1] print players[wtp_rank[:20]] print players[eigenrank[:20]] print 'eigen score ', r[getindex('Henin J.',players),0] print 'WTP pts ', pts[getindex('Henin J.',players),0] print 'eigen rank ', getindex(getindex('Henin J.',players),eigenrank)[0]+1 print 'WTP rank', getindex(getindex('Henin J.',players),wtp_rank)[0]+1 pl.plot(pts[:,0],r[:,0],'.') pl.plot(range(8000),np.polyval(np.polyfit(pts[:,0],r[:,0],2),range(8000))) for p in players[wtp_rank]: i = getindex(p,players) print p, pts[i,0][0], r[i,0][0], getindex(i,wtp_rank)[0]+1, getindex(i,eigenrank)[0]+1