setwd('~/mlb')
require("lme4")
yr <- 2004
facs <- c("cnt", "stands", "umpire", "pitcher", "catcher", "batter", "throws", "home_batting")
model.file <- sprintf("csaa.%d.model.output.R", yr)
ifile <- sprintf("csaaFiles/csaa.in.%d.csv", yr)
din <- read.csv(ifile, stringsAsFactors=TRUE)
for ( fac in facs) {
din[fac] <- as.factor(din[[fac]])
}
summary(din)
cs cnt catcher stands Min. :0.000 0 :138020 kendj001: 10815 L:172174 1st Qu.:0.000 1 : 47922 lopej001: 10569 R:217321 Median :0.000 10 : 46333 martv001: 10201 Mean :0.315 11 : 34800 schnb001: 10027 3rd Qu.:1.000 12 : 26568 barrm003: 9923 Max. :1.000 2 : 20802 posaj001: 9506 (Other): 75050 (Other) :328454 umpire pitcher PITCH_SEQ_TX home_batting meric901: 5913 hernl003: 2274 BX : 11479 0:197425 younl901: 5875 zitob001: 2041 CBX : 9540 1:192070 mealj901: 5870 estes001: 2024 CX : 9354 westj901: 5843 rogek001: 2013 BBBB : 8980 buckc901: 5834 webbb001: 1983 BBX : 7910 schrp901: 5828 ortir001: 1978 BCX : 6528 (Other) :354332 (Other) :377182 (Other):335704 batter game_id throws abreb001: 2017 PHI200407020: 318 L:107980 wilkb002: 1842 MIL200404220: 310 R:281515 podss001: 1802 OAK200405070: 306 jimed001: 1801 SEA200404190: 284 kendj001: 1789 MIN200408080: 280 blakc001: 1776 NYN200409110: 276 (Other) :378468 (Other) :387721
value.csaa.full <- glmer (cs ~ throws*stands +
home_batting +
cnt +
(1| catcher) +
(1|pitcher) +
(1|umpire) +
(1|batter), din, family=binomial(link = 'probit'), nAGQ = 0)
value.csaa.0 <- glmer(cs ~ (1|catcher), din, family=binomial(link = 'probit'), nAGQ = 0)
model <- value.csaa.full
rr <- ranef(model)
rrc <- row.names(rr$catcher)
model.predictions <- pnorm(predict(model))
df <- data.frame(cbind(row.names(rr$catcher), rr$catcher, 1:length(rr$catcher)))
names(df) <- c("catcher", "value", "csaa")
for ( cname in rrc ) {
cc <- din$catcher == cname
model.predictions.with <- sum(pnorm(model.predictions[cc]))
model.predictions.without <- sum(pnorm(model.predictions[cc] - df[df$catcher==cname,]$value))
num.pa <- sum(cc)
model.predictions.with
model.predictions.without
wowy <- (model.predictions.with - model.predictions.without)
df[df$catcher==cname,]$csaa <- wowy/num.pa
}
df.csaa.full <- df
model <- value.csaa.0
rr <- ranef(model)
rrc <- row.names(rr$catcher)
model.predictions <- pnorm(predict(model))
df <- data.frame(cbind(row.names(rr$catcher), rr$catcher, 1:length(rr$catcher)))
names(df) <- c("catcher", "value", "csaa")
for ( cname in rrc ) {
cc <- din$catcher == cname
model.predictions.with <- sum(pnorm(model.predictions[cc]))
model.predictions.without <- sum(pnorm(model.predictions[cc] - df[df$catcher==cname,]$value))
num.pa <- sum(cc)
model.predictions.with
model.predictions.without
wowy <- (model.predictions.with - model.predictions.without)
df[df$catcher==cname,]$csaa <- wowy/num.pa
}
df.csaa.0 <- df
save(value.csaa.full, value.csaa.0,
df.csaa.full, df.csaa.0,
file=sprintf("csaa.%d.model.output.R", yr))
mean(model.predictions)