import os
os.chdir('%s/mlb' % os.environ['HOME'])
import retrosheet_sql_tools
# the place to install retrosheet_sql_tools from is,
# https://github.com/wellsoliver/py-retrosheet
# scripts/retrosheet_sql_tools.py
rs = retrosheet_sql_tools.retrosheet_sql()
# retrosheet_sql_tools has lots of imports that arent relevent here
# should be possible to comment out json, ephem, pytz, tzwhere rather than installing them
def bsCodeToCs(bs_code):
veto = ['+', '*', '.', '1', '2', '3', '>']
balls = ['B', 'I', 'V', 'P']
strikes_noout = ['F','R']
strikes_maybeout = ['C', 'K', 'L', 'M', 'O', 'Q', 'S', 'T']
ans = {0:[], 1:[]}
bs = [0,0]
for ch in bs_code:
if ch in veto:
continue
# csaa only cares about pitches which were not swung at that means its either a B or a C or an other...
if ch=='C':
ans[1].append([bs[0], bs[1]])
elif ch=='B':
ans[0].append([bs[0], bs[1]])
else:
pass
# ...but still need to increment the count
if ch in balls:
bs[0] += 1
elif ch in strikes_noout:
bs[1] += 1
bs[1] = min(bs[1], 2)
elif ch in strikes_maybeout:
bs[1] += 1
return ans
def getData(m, minyr=1999, maxyr=1999, ilim=999999999):
q = 'select a.*, base4_ump_id as umpire from (select game_id, pit_id as pitcher, pit_hand_cd as throws, bat_id as batter, bat_hand_cd as stands, bat_home_id as home_batting, PITCH_SEQ_TX, \'00\' as cnt, pos2_fld_id as catcher from retrosheet_backup.events where year_id>=%d and year_id<=%d and playoff_flag=0 limit %d) a inner join retrosheet_backup.games b on a.game_id=b.game_id ' % (minyr, maxyr, ilim)
print q + ';'
data = m.sqlQueryToArray(q)
return data
def arrayToCsv(data, ofile=None, n2print=30000):
if ofile is None:
raise Exception
ofp = open(ofile, 'w')
ks = data.dtype.fields.keys()
ofp.write('cs,')
for k in ks[0:-1]:
ofp.write('%s,' % k)
k = ks[-1]
ofp.write('%s\n' % k)
for idata, d in enumerate(data):
ts = bsCodeToCs(d['PITCH_SEQ_TX'])
for ics in ts:
for t in ts[ics]:
t = ''.join([str(x) for x in t])
d['cnt'] = t
if idata % n2print==0:
print idata, len(data), ics, d, d['cnt']
ofp.write('%d,' % ics)
for k in ks[0:-1]:
ofp.write('%s,' % str(d[k]))
k = ks[-1]
ofp.write('%s\n' % str(d[k]))
ofp.close()
yrs = range(2004, 2004+1)
for yr in yrs:
print 'doing yr...', yr
data = getData(rs, minyr=yr, maxyr=yr)
ofile = 'csaa.in.%d.csv' % yr
arrayToCsv(data, ofile=ofile)
doing yr... 2004 select a.*, base4_ump_id as umpire from (select game_id, pit_id as pitcher, pit_hand_cd as throws, bat_id as batter, bat_hand_cd as stands, bat_home_id as home_batting, PITCH_SEQ_TX, '00' as cnt, pos2_fld_id as catcher from retrosheet_backup.events where year_id>=2004 and year_id<=2004 and playoff_flag=0 limit 999999999) a inner join retrosheet_backup.games b on a.game_id=b.game_id ; 0 194194 0 ('ANA200404130', 'escok001', 'R', 'suzui001', 'L', 0, 'CBFFBX', '01', 'molib001', 'mealj901') 01 0 194194 0 ('ANA200404130', 'escok001', 'R', 'suzui001', 'L', 0, 'CBFFBX', '12', 'molib001', 'mealj901') 12 0 194194 1 ('ANA200404130', 'escok001', 'R', 'suzui001', 'L', 0, 'CBFFBX', '00', 'molib001', 'mealj901') 00 30000 194194 0 ('BOS200407221', 'alvaa001', 'L', 'bigbl001', 'L', 0, '1F>FB111FX', '02', 'varij001', 'guccc901') 02 90000 194194 0 ('KCA200408300', 'kinnm002', 'R', 'higgb001', 'L', 0, 'FBBBFS', '01', 'buckj001', 'darlg901') 01 90000 194194 0 ('KCA200408300', 'kinnm002', 'R', 'higgb001', 'L', 0, 'FBBBFS', '11', 'buckj001', 'darlg901') 11 90000 194194 0 ('KCA200408300', 'kinnm002', 'R', 'higgb001', 'L', 0, 'FBBBFS', '21', 'buckj001', 'darlg901') 21 150000 194194 0 ('SDN200404290', 'bentc001', 'L', 'burrs001', 'L', 1, '*BCBX', '00', 'schnb001', 'fostm901') 00 150000 194194 0 ('SDN200404290', 'bentc001', 'L', 'burrs001', 'L', 1, '*BCBX', '11', 'schnb001', 'fostm901') 11 150000 194194 1 ('SDN200404290', 'bentc001', 'L', 'burrs001', 'L', 1, '*BCBX', '10', 'schnb001', 'fostm901') 10 180000 194194 0 ('TBA200408310', 'chenb001', 'L', 'lugoj001', 'R', 1, 'BFBBB', '00', 'lopej001', 'mcclt901') 00 180000 194194 0 ('TBA200408310', 'chenb001', 'L', 'lugoj001', 'R', 1, 'BFBBB', '11', 'lopej001', 'mcclt901') 11 180000 194194 0 ('TBA200408310', 'chenb001', 'L', 'lugoj001', 'R', 1, 'BFBBB', '21', 'lopej001', 'mcclt901') 21 180000 194194 0 ('TBA200408310', 'chenb001', 'L', 'lugoj001', 'R', 1, 'BFBBB', '31', 'lopej001', 'mcclt901') 31