import os
os.chdir('%s/mlb' % os.environ['HOME'])
import retrosheet_sql_tools
def getData_withFraa(m, minyr=1999, maxyr=1999, ilim=999999999):
q = 'select a.*, b.park_id as stadium, ' \
'b.temp_park_ct as temp , b.base4_ump_id as umpire, ' \
'c.woba as woba_mean, c.wobascale from ' \
'(select game_id, year_id, pit_id as pitcher, ' \
'bat_id as batter, bat_hand_cd as bats, bat_home_id, ' \
'inn_ct as inning, outs_ct, ' \
'concat(RUN1_FLD_CD>\'\', RUN2_FLD_CD>\'\', RUN3_FLD_CD>\'\') as start_bases_cd, ' \
'start_bat_score_ct-start_fld_score_ct as score_diff, ' \
'pit_start_fl as role, woba_pts, pos2_fld_id as catcher, ' \
'br_fraa as fraa from retrosheet_backup.events where ' \
'year_id>=%d and year_id<=%d and playoff_flag=0 and ' \
'woba_pts>=0 limit %d) a inner join retrosheet_backup.games b on ' \
'a.game_id=b.game_id inner join mlb.fgGuts c on ' \
'a.year_id=c.yearid' % (minyr, maxyr, ilim)
data = m.sqlQueryToArray(q)
return data
def getData_noFraa(m, minyr=1999, maxyr=1999, ilim=999999999):
q = 'select a.*, b.park_id as stadium, b.temp_park_ct as temp , ' \
'b.base4_ump_id as umpire, c.woba as woba_mean, c.wobascale from ' \
'(select game_id, year_id, pit_id as pitcher, bat_id as batter, ' \
'bat_hand_cd as bats, bat_home_id, inn_ct as inning, outs_ct, ' \
'concat(RUN1_FLD_CD>\'\', RUN2_FLD_CD>\'\', RUN3_FLD_CD>\'\') as start_bases_cd, ' \
'start_bat_score_ct-start_fld_score_ct as score_diff, pit_start_fl as role, ' \
'woba_pts, pos2_fld_id as catcher from retrosheet_backup.events where ' \
'year_id>=%d and year_id<=%d and playoff_flag=0 and woba_pts>=0 limit %d) a ' \
'inner join retrosheet_backup.games b on a.game_id=b.game_id ' \
'inner join mlb.fgGuts c on a.year_id=c.yearid' % (minyr, maxyr, ilim)
print q
sys.exit()
data = m.sqlQueryToArray(q)
return data
def arrayToCsv(data, ofile=None):
if ofile is None:
raise Exception
ofp = open(ofile, 'w')
ks = data.dtype.fields.keys()
for k in ks[0:-1]:
ofp.write('%s,' % k)
k = ks[-1]
ofp.write('%s\n' % k)
for d in data:
for k in ks[0:-1]:
ofp.write('%s,' % str(d[k]))
k = ks[-1]
ofp.write('%s\n' % str(d[k]))
ofp.close()
fraa = True
m = mlb.mlb()
yrs = range(1997, 2004+1)
for yr in yrs:
print 'doing yr...', yr
if fraa:
data = getData_withFraa(m, minyr=yr, maxyr=yr)
else:
data = getData_noFraa(m, minyr=yr, maxyr=yr)
ofile = 'dra.in.%d.csv' % yr
arrayToCsv(data, ofile=ofile)