# throat clearing %load_ext rmagic %%R # my notebook is wider than the default 80 chars, so set it here: options(width=300) # get some packages if you do not have them: require(xts) %%R # Read data file; if you have it locally, do this: # dat <- read.csv("PreisMoatStanley_ScientificReports_3_1684_2013.dat",sep=" ") # else, workaround for curl/ssl download from github (see http://stackoverflow.com/a/4127133/164611 ) temporaryFile <- tempfile() download.file("https://raw.github.com/leesanglo/Replicating_google_trends/master/PreisMoatStanley_ScientificReports_3_1684_2013.dat", destfile=temporaryFile, method="curl") dat <- read.csv(temporaryFile,sep=" ") # peel off DJIA data, and make into an xts DJIA.data <- dat[,names(dat) %in% c("DJIA.Date","DJIA.Closing.Price")] dat <- dat[,!(names(dat) %in% names(DJIA.data))] # add +1 day to the date to round up to midnight. DJIA.xts <- xts(DJIA.data[,"DJIA.Closing.Price"], order.by=as.POSIXct(DJIA.data[,"DJIA.Date"])+86400) # peel off dates and make into an xts google.dates <- dat[,names(dat) %in% c("Google.Start.Date","Google.End.Date")] dat <- dat[,!(names(dat) %in% names(google.dates))] search.terms.xts <- xts(dat,order.by=as.POSIXct(google.dates[,"Google.End.Date"])) # clean up rm(list=c("DJIA.data","google.dates","dat")) %%R # this function takes a vector, and returns the difference # between a value and the mean value over the previous # boxwin values. running.center <- function(x,lag=10) { x.cum <- cumsum(x) x.dif <- c(x.cum[1:lag],diff(x.cum,lag)) x.df <- pmin(1:length(x.cum),lag) x.mu <- x.dif / x.df x.ret <- c(NaN,x[2:length(x)] - x.mu[1:(length(x)-1)]) return(x.ret) } # follow the authors in using a 3 week window: delta.t <- 3 # make the detrended 'signal' signal.xts <- xts(apply(search.terms.xts,2,running.center,lag=delta.t), order.by=time(search.terms.xts)) # at this point, do a spot check to make sure our function worked OK my.err <- signal.xts[delta.t+5,10] - (search.terms.xts[delta.t+5,10] - mean(search.terms.xts[5:(delta.t+4),10])) if (abs(my.err) > 1e-8) stop("fancy function miscomputes the running mean") # chop off the first delta.t rows signal.xts <- signal.xts[-c(1:delta.t)] mkt.xts <- DJIA.xts[-c(1:delta.t)] # and for the market # trading signal; the original authors 'short' the trend: trade.xts <- - signal.xts # break ties arbitrarily. anything smaller than a certain absolute # value gets moved to the tie-breaker. TIE.BREAKER <- 1e-5 TIE.LIMIT <- abs(TIE.BREAKER) trade.xts[abs(trade.xts) <= TIE.LIMIT] <- TIE.BREAKER %%R # braindead 'backtest' function dumb.bt <- function(sig.xts,mkt.xts) { if (dim(sig.xts)[1] != dim(mkt.xts)[1]) stop("wrong row sizes") mkt.lret <- diff(log(mkt.xts),lag=1) mkt.rret <- exp(mkt.lret) - 1 mkt.rret <- as.matrix(mkt.rret[-1]) # chop the first sig.xts <- sig.xts[-dim(sig.xts)[1]] # chop the last bt.rets <- xts(apply(sig.xts,2,function(v) { v * mkt.rret }), order.by=time(sig.xts)) return(bt.rets) } # backtest the sign: bt.rets <- dumb.bt(sign(trade.xts),mkt.xts) # instead, backtest the magnitude: #bt.rets <- dumb.bt(4 * trade.xts,mkt.xts) bt.lrets <- log(1 + bt.rets) # compute log returns bt.mtm <- exp(apply(bt.lrets,2,cumsum)) %%R -w 500 -h 500 # first: apply a t-test to every column, get the p-values ttest.pvals <- apply(bt.lrets,2,function(x) { t.res <- t.test(x,alternative="two.sided") p.v <- t.res$p.value }) # function for Q-Q plot against uniformity qqunif <- function(x,xlab="Theoretical Quantiles under Uniformity", ylab=NULL,...) { if (is.null(ylab)) ylab=paste("Sample Quantiles (",deparse(substitute(x)),")",sep="") qqplot(qunif(ppoints(length(x))),x,xlab=xlab,ylab=ylab,...) abline(0,1,col='red') } qqunif(ttest.pvals) %%R -w 500 -h 500 set.seed(12345) # remind me to change my luggage combo ;) rand.xts <- xts(matrix(rnorm(prod(dim(trade.xts))),nrow=dim(trade.xts)[1]), order.by=time(trade.xts)) # backtest the sign: rand.rets <- dumb.bt(sign(rand.xts),mkt.xts) rand.lrets <- log(1 + rand.rets) # compute log returns ttest.rand.pvals <- apply(rand.lrets,2,function(x) { t.res <- t.test(x,alternative="two.sided") p.v <- t.res$p.value }) qqunif(ttest.rand.pvals) %%R -w 500 -h 500 # perform oddsratio tests: require(epitools) # braindead 'odds-ratio backtest' function dumb.odds.bt <- function(sig.xts,mkt.xts) { if (dim(sig.xts)[1] != dim(mkt.xts)[1]) stop("wrong row sizes") mkt.lret <- diff(log(mkt.xts),lag=1) mkt.rret <- exp(mkt.lret) - 1 mkt.rret <- as.matrix(mkt.rret[-1]) # chop the first sig.xts <- sig.xts[-dim(sig.xts)[1]] # chop the last bt.rets <- apply(sig.xts,2,function(v) { or.tst <- oddsratio(x=factor(sign(v)),y=factor(sign(mkt.rret))) or.tst$p.value[2,"fisher.exact"]}) return(bt.rets) } # odd-ratio backtest the sign: bt.odds <- dumb.odds.bt(sign(trade.xts),mkt.xts) qqunif(bt.odds) %%R require(SharpeR) # under the latest github version of the package, this is legit, # but bonks under current CRAN version: #srs <- as.sr(bt.lrets) #sharpe.test <- sr_test(srs,alternative="two.sided") # this gives the same plot as the t-test plot above, so skip it. #qqunif(sharpe.test$p.value) # Hotelling's test big.sr <- as.sropt(bt.lrets) print(big.sr) print(confint(big.sr)) print(inference(big.sr,type="KRS")) %%R -w 500 -h 500 # split em. n.row <- dim(bt.lrets)[1] n.split <- floor(n.row/2) # you need the latest version of this: # require(devtools) # install_github('SharpeR',username='shabbychef) require(SharpeR) srs.is <- as.sr(bt.lrets[1:n.split,]) srs.oos <- as.sr(bt.lrets[(n.split+1):n.row,]) i.v.o <- lm(srs.oos$sr ~ srs.is$sr) print(summary(i.v.o)) plot(srs.is$sr,srs.oos$sr) abline(i.v.o) cat(sprintf("best in-sample strategy: '%s'\n",rownames(srs.is$sr)[which.max(srs.is$sr)])) cat(sprintf("worst in-sample strategy: '%s'\n",rownames(srs.is$sr)[which.min(srs.is$sr)])) %%R require(SharpeR) # these do *not* have equal SR; but the test # can reject for weird reasons... all.eq <- sr_equality_test(as.matrix(bt.lrets),type="F") print(all.eq) all.eq <- sr_equality_test(as.matrix(bt.lrets),type="chisq") print(all.eq)