setwd('~/mlb')
require("lme4")
require("dplyr")
schmidt <- "schmj001"
pedro <- "martp001"
yr <- 2004
facs <- c("inning", "catcher", "outs_ct",
"bat_home_id","umpire","year_id",
"pitcher", "bats", "batter",
"stadium", "role", "start_bases_cd")
ifile <- sprintf("draFiles/dra.in.%d.csv", yr)
din.dra <- read.csv(ifile, stringsAsFactors=TRUE)
for ( fac in facs) {
din.dra[fac] <- as.factor(din.dra[[fac]])
}
din.dra$lwts <- (din.dra$woba_pts-din.dra$woba_mean)/din.dra$wobascale
din.dra$temp_log <- log((din.dra$temp-32)*5/9 + 273) # log of temperture in Kelvin
load(sprintf("csaaFiles/csaa.%d.model.output.R", yr))
din.dra.csaa <- inner_join(din.dra, df.csaa.full, by='catcher')
#value.model <- lmer(lwts ~ bats + inning*score_diff + start_bases_cd*outs_ct + csaa + temp + stadium + PF + role + fraa*bat_home_id + inning*bat_home_id + (1|batter) + (1|pitcher) + (1|catcher) + (1|umpire), data=din)
summary(din.dra.csaa)
inning catcher outs_ct temp fraa 6 :21340 kendj001: 5487 0:65083 Min. : 37.00 Min. :-88.561 4 :21211 martv001: 4865 1:62498 1st Qu.: 68.00 1st Qu.:-15.904 1 :21200 lopej001: 4848 2:60938 Median : 72.00 Median : -4.425 7 :21182 liebm001: 4789 Mean : 72.73 Mean : -3.842 5 :21103 schnb001: 4782 3rd Qu.: 79.00 3rd Qu.: 8.402 3 :21039 posaj001: 4757 Max. :100.00 Max. :114.087 (Other):61444 (Other) :158991 bat_home_id umpire pitcher year_id 0:96098 westj901: 2923 hernl003: 1053 2004:188519 1:92421 meric901: 2908 buehm001: 1016 younl901: 2852 oswar001: 983 buckc901: 2789 johnr005: 964 mealj901: 2787 ponss001: 954 welkt901: 2771 muldm001: 952 (Other) :171489 (Other) :182597 score_diff woba_mean batter game_id Min. :-22.00000 Min. :0.33 suzui001: 762 TEX200406240: 153 1st Qu.: -2.00000 1st Qu.:0.33 pierj002: 748 PHI200407020: 143 Median : 0.00000 Median :0.33 younm003: 739 MIN200408080: 142 Mean : -0.03723 Mean :0.33 robeb003: 734 MIL200404220: 134 3rd Qu.: 2.00000 3rd Qu.:0.33 iztuc001: 728 MIN200404060: 129 Max. : 22.00000 Max. :0.33 rollj001: 725 ANA200406130: 127 (Other) :184083 (Other) :187691 stadium woba_pts bats role wobascale DEN02 : 6630 Min. :0.0000 L: 80791 FALSE: 65103 Min. :1.18 BAL12 : 6464 1st Qu.:0.0000 R:107728 TRUE :123416 1st Qu.:1.18 CHI12 : 6426 Median :0.0000 Median :1.18 SFO03 : 6418 Mean :0.3383 Mean :1.18 CLE08 : 6408 3rd Qu.:0.7370 3rd Qu.:1.18 PHI13 : 6404 Max. :1.9830 Max. :1.18 (Other):149769 start_bases_cd lwts temp_log value 0 :105253 Min. :-0.27966 Min. :5.620 Min. :-0.069044 100 : 33848 1st Qu.:-0.27966 1st Qu.:5.680 1st Qu.:-0.021810 10 : 16631 Median :-0.27966 Median :5.688 Median : 0.005572 110 : 12808 Mean : 0.00703 Mean :5.689 Mean : 0.001248 101 : 5766 3rd Qu.: 0.34492 3rd Qu.:5.701 3rd Qu.: 0.021383 1 : 5680 Max. : 1.40085 Max. :5.739 Max. : 0.072239 (Other): 8533 csaa Min. :-0.0258789 1st Qu.:-0.0081972 Median : 0.0020988 Mean : 0.0004828 3rd Qu.: 0.0080300 Max. : 0.0271872
value.dra.full <- lmer(lwts ~ inning*score_diff +
start_bases_cd*outs_ct +
csaa +
temp_log +
bats*stadium +
role +
fraa*bat_home_id +
inning*bat_home_id +
(1|batter) + (1|pitcher) + (1|catcher) + (1|umpire),
data=din.dra.csaa)
rr.dra <- ranef(value.dra.full)
Warning message: : Some predictor variables are on very different scales: consider rescaling
value.dra.0 <- lmer(lwts ~ (1|pitcher), data=din.dra.csaa)
value.dra.inn.scorediff <- lmer(lwts ~ (1|pitcher) + inning*score_diff, data=din.dra.csaa)
value.dra.bases.outs <- lmer(lwts ~ (1|pitcher) + start_bases_cd*outs_ct, data=din.dra.csaa)
value.dra.csaa <- lmer(lwts ~ (1|pitcher) + csaa, data=din.dra.csaa)
value.dra.temp_log <- lmer(lwts ~ (1|pitcher) + temp_log, data=din.dra.csaa)
value.dra.bats.stadium <- lmer(lwts ~ (1|pitcher) + bats*stadium, data=din.dra.csaa)
value.dra.role <- lmer(lwts ~ (1|pitcher) + role, data=din.dra.csaa)
value.dra.fraa.bat_home_id <- lmer(lwts ~ (1|pitcher) + fraa*bat_home_id, data=din.dra.csaa)
value.dra.inn.bat_home_id <- lmer(lwts ~ (1|pitcher) + inning*bat_home_id, data=din.dra.csaa)
value.dra.batter <- lmer(lwts ~ (1|pitcher) + (1|batter), data=din.dra.csaa)
value.dra.catcher <- lmer(lwts ~ (1|pitcher) + (1|catcher), data=din.dra.csaa)
value.dra.umpire <- lmer(lwts ~ (1|pitcher) + (1|umpire), data=din.dra.csaa)
save(value.dra.full
, value.dra.0
, value.dra.inn.scorediff
, value.dra.bases.outs
, value.dra.csaa
, value.dra.temp_log
, value.dra.bats.stadium
, value.dra.role
, value.dra.fraa.bat_home_id
, value.dra.inn.bat_home_id
, value.dra.batter
, value.dra.catcher
, value.dra.umpire
, file=sprintf("dra.%d.model.output.R", yr)
)