# throat clearing
%load_ext rmagic

%%R
# my notebook is wider than the default 80 chars, so set it here:
options(width=300)
# get some packages if you do not have them:
require(xts)

%%R 

# Read data file; if you have it locally, do this:
# dat <- read.csv("PreisMoatStanley_ScientificReports_3_1684_2013.dat",sep=" ")
# else, workaround for curl/ssl download from github (see http://stackoverflow.com/a/4127133/164611 )
temporaryFile <- tempfile()
download.file("https://raw.github.com/leesanglo/Replicating_google_trends/master/PreisMoatStanley_ScientificReports_3_1684_2013.dat",
        destfile=temporaryFile, method="curl")
dat <- read.csv(temporaryFile,sep=" ")

# peel off DJIA data, and make into an xts
DJIA.data <- dat[,names(dat) %in% c("DJIA.Date","DJIA.Closing.Price")]
dat <- dat[,!(names(dat) %in% names(DJIA.data))]
# add +1 day to the date to round up to midnight.
DJIA.xts <- xts(DJIA.data[,"DJIA.Closing.Price"],
								order.by=as.POSIXct(DJIA.data[,"DJIA.Date"])+86400)

# peel off dates and make into an xts
google.dates <- dat[,names(dat) %in% c("Google.Start.Date","Google.End.Date")]
dat <- dat[,!(names(dat) %in% names(google.dates))]
search.terms.xts <- xts(dat,order.by=as.POSIXct(google.dates[,"Google.End.Date"]))

# clean up
rm(list=c("DJIA.data","google.dates","dat"))

%%R
# this function takes a vector, and returns the difference
# between a value and the mean value over the previous
# boxwin values.
running.center <- function(x,lag=10) {
	x.cum <- cumsum(x)
	x.dif <- c(x.cum[1:lag],diff(x.cum,lag))
	x.df  <- pmin(1:length(x.cum),lag)
	x.mu  <- x.dif / x.df
	x.ret <- c(NaN,x[2:length(x)] - x.mu[1:(length(x)-1)])
	return(x.ret)
}
# follow the authors in using a 3 week window: 
delta.t <- 3

# make the detrended 'signal'
signal.xts <- xts(apply(search.terms.xts,2,running.center,lag=delta.t),
						 order.by=time(search.terms.xts))
# at this point, do a spot check to make sure our function worked OK
my.err <- signal.xts[delta.t+5,10] - 
					(search.terms.xts[delta.t+5,10] - mean(search.terms.xts[5:(delta.t+4),10]))
if (abs(my.err) > 1e-8)
	stop("fancy function miscomputes the running mean")
# chop off the first delta.t rows
signal.xts <- signal.xts[-c(1:delta.t)]
mkt.xts <- DJIA.xts[-c(1:delta.t)]  # and for the market

# trading signal; the original authors 'short' the trend:
trade.xts <- - signal.xts
# break ties arbitrarily. anything smaller than a certain absolute 
# value gets moved to the tie-breaker. 
TIE.BREAKER <- 1e-5
TIE.LIMIT <- abs(TIE.BREAKER)
trade.xts[abs(trade.xts) <= TIE.LIMIT] <- TIE.BREAKER

%%R
# braindead 'backtest' function
dumb.bt <- function(sig.xts,mkt.xts) {
	if (dim(sig.xts)[1] != dim(mkt.xts)[1])
		stop("wrong row sizes")
	mkt.lret <- diff(log(mkt.xts),lag=1)
	mkt.rret <- exp(mkt.lret) - 1
	mkt.rret <- as.matrix(mkt.rret[-1])  # chop the first
	sig.xts  <- sig.xts[-dim(sig.xts)[1]]  # chop the last
	bt.rets <- xts(apply(sig.xts,2,function(v) { v * mkt.rret }),
								 order.by=time(sig.xts))
	return(bt.rets)
}
# backtest the sign:
bt.rets <- dumb.bt(sign(trade.xts),mkt.xts)
# instead, backtest the magnitude:
#bt.rets <- dumb.bt(4 * trade.xts,mkt.xts)
bt.lrets <- log(1 + bt.rets)  # compute log returns
bt.mtm <- exp(apply(bt.lrets,2,cumsum))

%%R -w 500 -h 500 

# first: apply a t-test to every column, get the p-values
ttest.pvals <- apply(bt.lrets,2,function(x) { 
						t.res <- t.test(x,alternative="two.sided")
						p.v <- t.res$p.value
								 })
# function for Q-Q plot against uniformity
qqunif <- function(x,xlab="Theoretical Quantiles under Uniformity",
									 ylab=NULL,...) {
	if (is.null(ylab))
		ylab=paste("Sample Quantiles (",deparse(substitute(x)),")",sep="")
	qqplot(qunif(ppoints(length(x))),x,xlab=xlab,ylab=ylab,...)
	abline(0,1,col='red')
}
qqunif(ttest.pvals)

%%R -w 500 -h 500

set.seed(12345)  # remind me to change my luggage combo ;)
rand.xts <- xts(matrix(rnorm(prod(dim(trade.xts))),nrow=dim(trade.xts)[1]),
                order.by=time(trade.xts))
# backtest the sign:
rand.rets <- dumb.bt(sign(rand.xts),mkt.xts)
rand.lrets <- log(1 + rand.rets)  # compute log returns

ttest.rand.pvals <- apply(rand.lrets,2,function(x) { 
						t.res <- t.test(x,alternative="two.sided")
						p.v <- t.res$p.value
                                })
qqunif(ttest.rand.pvals)

%%R -w 500 -h 500

# perform oddsratio tests:
require(epitools)
# braindead 'odds-ratio backtest' function
dumb.odds.bt <- function(sig.xts,mkt.xts) {
	if (dim(sig.xts)[1] != dim(mkt.xts)[1])
		stop("wrong row sizes")
	mkt.lret <- diff(log(mkt.xts),lag=1)
	mkt.rret <- exp(mkt.lret) - 1
	mkt.rret <- as.matrix(mkt.rret[-1])  # chop the first
	sig.xts  <- sig.xts[-dim(sig.xts)[1]]  # chop the last
	bt.rets <- apply(sig.xts,2,function(v) { 
        or.tst <- oddsratio(x=factor(sign(v)),y=factor(sign(mkt.rret)))
        or.tst$p.value[2,"fisher.exact"]})
	return(bt.rets)
}
# odd-ratio backtest the sign:
bt.odds <- dumb.odds.bt(sign(trade.xts),mkt.xts)
qqunif(bt.odds)

%%R 
require(SharpeR)

# under the latest github version of the package, this is legit,
# but bonks under current CRAN version:
#srs <- as.sr(bt.lrets)
#sharpe.test <- sr_test(srs,alternative="two.sided")
# this gives the same plot as the t-test plot above, so skip it.
#qqunif(sharpe.test$p.value)

# Hotelling's test
big.sr <- as.sropt(bt.lrets)
print(big.sr)
print(confint(big.sr))
print(inference(big.sr,type="KRS"))

%%R -w 500 -h 500

# split em.
n.row <- dim(bt.lrets)[1]
n.split <- floor(n.row/2)

# you need the latest version of this:
# require(devtools)
# install_github('SharpeR',username='shabbychef)
require(SharpeR)
srs.is <- as.sr(bt.lrets[1:n.split,])
srs.oos <- as.sr(bt.lrets[(n.split+1):n.row,])
i.v.o <- lm(srs.oos$sr ~ srs.is$sr)
print(summary(i.v.o))
plot(srs.is$sr,srs.oos$sr)
abline(i.v.o)

cat(sprintf("best in-sample strategy: '%s'\n",rownames(srs.is$sr)[which.max(srs.is$sr)]))
cat(sprintf("worst in-sample strategy: '%s'\n",rownames(srs.is$sr)[which.min(srs.is$sr)]))

%%R
require(SharpeR)

# these do *not* have equal SR; but the test
# can reject for weird reasons...
all.eq <- sr_equality_test(as.matrix(bt.lrets),type="F")
print(all.eq)
all.eq <- sr_equality_test(as.matrix(bt.lrets),type="chisq")
print(all.eq)