#!/usr/bin/env python # coding: utf-8 # In[ ]: import os os.chdir('%s/mlb' % os.environ['HOME']) import retrosheet_sql_tools # In[ ]: def getData_withFraa(m, minyr=1999, maxyr=1999, ilim=999999999): q = 'select a.*, b.park_id as stadium, ' \ 'b.temp_park_ct as temp , b.base4_ump_id as umpire, ' \ 'c.woba as woba_mean, c.wobascale from ' \ '(select game_id, year_id, pit_id as pitcher, ' \ 'bat_id as batter, bat_hand_cd as bats, bat_home_id, ' \ 'inn_ct as inning, outs_ct, ' \ 'concat(RUN1_FLD_CD>\'\', RUN2_FLD_CD>\'\', RUN3_FLD_CD>\'\') as start_bases_cd, ' \ 'start_bat_score_ct-start_fld_score_ct as score_diff, ' \ 'pit_start_fl as role, woba_pts, pos2_fld_id as catcher, ' \ 'br_fraa as fraa from retrosheet_backup.events where ' \ 'year_id>=%d and year_id<=%d and playoff_flag=0 and ' \ 'woba_pts>=0 limit %d) a inner join retrosheet_backup.games b on ' \ 'a.game_id=b.game_id inner join mlb.fgGuts c on ' \ 'a.year_id=c.yearid' % (minyr, maxyr, ilim) data = m.sqlQueryToArray(q) return data # In[ ]: def getData_noFraa(m, minyr=1999, maxyr=1999, ilim=999999999): q = 'select a.*, b.park_id as stadium, b.temp_park_ct as temp , ' \ 'b.base4_ump_id as umpire, c.woba as woba_mean, c.wobascale from ' \ '(select game_id, year_id, pit_id as pitcher, bat_id as batter, ' \ 'bat_hand_cd as bats, bat_home_id, inn_ct as inning, outs_ct, ' \ 'concat(RUN1_FLD_CD>\'\', RUN2_FLD_CD>\'\', RUN3_FLD_CD>\'\') as start_bases_cd, ' \ 'start_bat_score_ct-start_fld_score_ct as score_diff, pit_start_fl as role, ' \ 'woba_pts, pos2_fld_id as catcher from retrosheet_backup.events where ' \ 'year_id>=%d and year_id<=%d and playoff_flag=0 and woba_pts>=0 limit %d) a ' \ 'inner join retrosheet_backup.games b on a.game_id=b.game_id ' \ 'inner join mlb.fgGuts c on a.year_id=c.yearid' % (minyr, maxyr, ilim) print q sys.exit() data = m.sqlQueryToArray(q) return data # In[ ]: def arrayToCsv(data, ofile=None): if ofile is None: raise Exception ofp = open(ofile, 'w') ks = data.dtype.fields.keys() for k in ks[0:-1]: ofp.write('%s,' % k) k = ks[-1] ofp.write('%s\n' % k) for d in data: for k in ks[0:-1]: ofp.write('%s,' % str(d[k])) k = ks[-1] ofp.write('%s\n' % str(d[k])) ofp.close() # In[ ]: fraa = True m = mlb.mlb() yrs = range(1997, 2004+1) for yr in yrs: print 'doing yr...', yr if fraa: data = getData_withFraa(m, minyr=yr, maxyr=yr) else: data = getData_noFraa(m, minyr=yr, maxyr=yr) ofile = 'dra.in.%d.csv' % yr arrayToCsv(data, ofile=ofile)