%%bash
mkdir -p analysis_pyrad/
mkdir -p analysis_dtests/
mkdir -p analysis_raxml/
mkdir -p analysis_mrbayes/

%%bash
## create template file
~/Dropbox/pyrad-git/pyRAD -n
mv params.txt analysis_pyrad/params.txt

## substitute in parameters
sed -i '/## 1. /c\analysis_pyrad/_c90d5     ## 1. working directory ' analysis_pyrad/params.txt
sed -i '/## 2. /c\/home/deren/Documents/RADSEQ_DATA/Carex/*.fastq             ## 2. data file         ' analysis_pyrad/params.txt
sed -i '/## 3. /c\/home/deren/Documents/RADSEQ_DATA/barcodes/Carex1.barcodes  ## 3. barcodes file     ' analysis_pyrad/params.txt
sed -i '/## 7. /c\10                    ## 7. use multiple cores ' analysis_pyrad/params.txt
sed -i '/## 8. /c\5                     ## 8. set mindepth   ' analysis_pyrad/params.txt
sed -i '/## 9. /c\3                     ## 9. max lowQual   ' analysis_pyrad/params.txt
sed -i '/## 10. /c\.90                  ## 10. decreased clust threshold ' analysis_pyrad/params.txt
sed -i '/## 11. /c\gbs                  ## 11. changed data type ' analysis_pyrad/params.txt
sed -i '/## 14. /c\c90d5m4p3            ## 14. output name '       analysis_pyrad/params.txt
sed -i '/## 21./c\2                     ## 21. Filter type '       analysis_pyrad/params.txt
sed -i '/## 23./c\3                     ## 23. maxH consens '      analysis_pyrad/params.txt
sed -i '/## 24./c\3                     ## 24. maxN consens '      analysis_pyrad/params.txt
sed -i '/## 29./c\2,2                   ## 29. trim overhang edges ' analysis_pyrad/params.txt
sed -i '/## 30./c\n                     ## 30. add output formats '  analysis_pyrad/params.txt
sed -i '/## 34./c\99999                 ## 34. no max stack size '   analysis_pyrad/params.txt

%%bash
## create sub working directory
mkdir -p analysis_pyrad/_c90d5/

## demultiplex data files
~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 1   2&> /dev/null

%%bash
## assemble data sets
~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 234567   2&> /dev/null

## print summary of results
cat analysis_pyrad/_c90d5/stats/c90d5m4p3.stats

%%bash
## create sub working directory
mkdir -p analysis_pyrad/_c85d5/

## set parameters in params file
sed -i '/## 1. /c\analysis_pyrad/_c85d5     ## 1. working directory ' analysis_pyrad/params.txt
sed -i '/## 10./c\.85                       ## 10. decreased clust threshold ' analysis_pyrad/params.txt
sed -i '/## 14./c\c85d5m4p3                 ## 14. output name '   analysis_pyrad/params.txt
sed -i '/## 18./c\analysis_pyrad/_c90d5/fastq/*.gz    ## 18. use de-multiplexed data '   analysis_pyrad/params.txt

%%bash
~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 234567  2&> /dev/null
cat analysis_pyrad/_c85d5/stats/c85d5m4p3.stats

%%bash
## create sub working directory
mkdir -p analysis_pyrad/_c85d2/

## copy re-usable files from _c85d5/
cp -r analysis_pyrad/_c85d5/stats/ analysis_pyrad/_c85d2/
cp -r analysis_pyrad/_c85d5/clust.85/ analysis_pyrad/_c85d2/

## remove consens files b/c they will be remade at lower mindepth
rm analysis_pyrad/_c85d2/clust.85/*consens*

## set new parameters in params file
sed -i '/## 1. /c\analysis_pyrad/_c85d2       ## 1. working directory ' analysis_pyrad/params.txt
sed -i '/## 8. /c\2                           ## 8. set mindepth ' analysis_pyrad/params.txt
sed -i '/## 14./c\c85d2m4p3                   ## 14. output name ' analysis_pyrad/params.txt
sed -i '/## 31./c\1                           ## 31. allow lowdepth base calls ' analysis_pyrad/params.txt

%%bash
~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 567 2&> /dev/null
cat analysis_pyrad/_c85d2/stats/c85d2m4p3.stats

%%bash
raxmlHPC-PTHREADS-AVX -f a \
                      -p 12345 \
                      -s analysis_pyrad/_c90d5/outfiles/c90d5m4p3.phy \
                      -x 12345 -N 100 \
                      -m GTRGAMMA \
                      -n c90d5m4p3 \
                      -o 'stramin' \
                      -w /home/deren/Documents/Marcial/Carex/analysis_raxml/ \
                      -T 5   2&> /dev/null

%%bash
raxmlHPC-PTHREADS-AVX -f a \
                      -p 12345 \
                      -s analysis_pyrad/_c85d5/outfiles/c85d5m4p3.phy \
                      -x 12345 -N 100 \
                      -m GTRGAMMA \
                      -n c85d5m4p3 \
                      -o 'stramin' \
                      -w /home/deren/Documents/Marcial/Carex/analysis_raxml/ \
                      -T 5   2&> /dev/null

%%bash
raxmlHPC-PTHREADS-AVX -f a \
                      -p 12345 \
                      -s analysis_pyrad/_c85d2/outfiles/c85d2m4p3.phy \
                      -x 12345 -N 100 \
                      -m GTRGAMMA \
                      -n c85d2m4p3 \
                      -o 'stramin' \
                      -w /home/deren/Documents/Marcial/Carex/analysis_raxml/ \
                      -T 5   2&> /dev/null

## create a nexus string with parameters for mrbayes run
nexinfo = """
\nbegin mrbayes;
lset nst=6 rates=invgamma;
set autoclose=yes;
mcmc ngen=10000000 printfreq=1000 samplefreq=1000 nchains=4 savebrlens=yes;
sump;
sumt;
end; 
"""

## append mrbayes nexus info to the end of nexus sequence files
! echo '$nexinfo' >> analysis_pyrad/_c90d5/outfiles/c90d5m4p3.nex
! echo '$nexinfo' >> analysis_pyrad/_c85d5/outfiles/c85d5m4p3.nex
! echo '$nexinfo' >> analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex


## call mrbayes on each nexus file data set
## Do two runs for each data set
quiet = ! mb analysis_pyrad/_c90d5/outfiles/c90d5m4p3.nex
quiet = ! mb analysis_pyrad/_c85d5/outfiles/c85d5m4p3.nex
quiet = ! mb analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex
#quiet = ! mpirun -np 6 analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex

## clean up, move files to mrbayes directory
! mv /home/deren/Documents/Marcial/Carex/analysis_pyrad/_c90d5/outfiles/c90d5m4p3.nex.* analysis_mrbayes/
! mv /home/deren/Documents/Marcial/Carex/analysis_pyrad/_c85d5/outfiles/c85d5m4p3.nex.* analysis_mrbayes/
! mv /home/deren/Documents/Marcial/Carex/analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex.* analysis_mrbayes/

%%bash
## create Dtest template file
~/Dropbox/pyrad-git/pyRAD -D > analysis_dtests/input.D4

%%bash
## substitute in new parameters
sed -i '/## N bootstrap/c\200                         ## N bootstrap ' analysis_dtests/input.D4
sed -i '/## loc/c\analysis_pyrad/_c85d2/outfiles/c85d2m4p3.loci     ## loc/path ' analysis_dtests/input.D4
sed -i '/## output file /c\analysis_dtests/test1     ## output file ' analysis_dtests/input.D4
sed -i '/## N cores/c\4                           ## N cores ' analysis_dtests/input.D4

CS = ['2816','2830D','2893G1']
W = ['waponah']
CSU = ['suberec'] 
CL = ['longii']
CV = ['vexans']
CA = ['albolut']
S = ['stramin']

def makeD4(P1,P2,P3,O,poly,infile):
    ! head -n 8 $infile > temp
    ! mv temp $infile
    tests = []
    if not poly:
        for o in O:
            for p3 in P3:
                for p2 in P2:
                    for p1 in P1:
                        if p1 != p2:
                            if set((p1,p2,p3,o)) not in tests:
                                tests.append(set((p1,p2,p3,o)))
                                test = '%s\t%s\t%s\t%s\t##' % (p1,p2,p3,o)
                                ## appends tests to input file
                                ! echo '$test' >> analysis_dtests/input.D4
    else:
        for p3 in P3:
            for p2 in P2:
                for p1 in P1:
                    if p1 != p2:
                        if set((p1,p2,p3)) not in tests:
                            tests.append(set((p1,p2,p3)))
                            test = '[%s]\t[%s]\t[%s]\t[%s]\t##' % (p1,p2,p3,",".join(O))
                            ## appends tests to input file
                            ! echo '$test' >> analysis_dtests/input.D4

# make and print input file               
makeD4(CS,CS+W,CSU+CL+CV+CA,S,0,"analysis_dtests/input.D4")
! cat analysis_dtests/input.D4

quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4

## change output file name
! sed -i '/## output file /c\analysis_dtests/test1.H   ## output file ' analysis_dtests/input.D4
## make new input file
makeD4(CS,CS,CSU+CL+CV+CA,S,1,"analysis_dtests/input.D4")
## run the test
quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4
! cat analysis_dtests/test1.D4.txt

## change output file name
! sed -i '/## output file /c\analysis_dtests/test2     ## output file ' analysis_dtests/input.D4
## make new input file
makeD4(CS,CS,W,S+CL+CV+CA,0,"analysis_dtests/input.D4")
## run the test
quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4
! cat analysis_dtests/test2.D4.txt

## change output file name
! sed -i '/## output file /c\analysis_dtests/test2.H     ## output file ' analysis_dtests/input.D4
## make new input file
makeD4(CS,CS,W,S+CL+CV+CA,1,"analysis_dtests/input.D4")
## run the test
quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4

## change output file name
! sed -i '/## output file /c\analysis_dtests/test3     ## output file ' analysis_dtests/input.D4
## make new input file
makeD4(CL,CV,CA+CSU,S+CS,0,"analysis_dtests/input.D4")
## run the test
quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4
! cat analysis_dtests/test3.D4.txt

## change output file name
! sed -i '/## output file /c\analysis_dtests/test3.H     ## output file ' analysis_dtests/input.D4
## make new input file
makeD4(CL,CV,CA+CSU,S+CS,1,"analysis_dtests/input.D4")
## run the test
quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4

## If executing this code in the IPython notebook 
## you will need to have the python library python-Rpy2 installed
## otherwise you can execute the code below in a separate R shell
%load_ext rmagic

%%R
library(ape)
library(RADami)

%%R
tre <- read.tree("analysis_raxml/RAxML_bestTree.c90d5m4p3")
boots <- read.tree("analysis_raxml/RAxML_bipartitions.c90d5m4p3")
plot(ladderize(tre))
nodelabels(boots$node.label, bg='grey')

%%R
tre <- read.tree("analysis_raxml/RAxML_bestTree.c85d5m4p3")
boots <- read.tree("analysis_raxml/RAxML_bipartitions.c85d5m4p3")
tre <- ladderize(rotate(tre,11))
plot(tre)
nodelabels(boots$node.label, bg='grey')

%%R
tre <- read.tree("analysis_raxml/RAxML_bestTree.c85d2m4p3")
boots <- read.tree("analysis_raxml/RAxML_bipartitions.c85d2m4p3")
plot(ladderize(tre))
nodelabels(boots$node.label, bg='grey')

%%R

# Partitioned RAD analysis of the Carex scoparia trees
# 2014-06-06

# First, prune to the ingroup taxa of relevance to your question.
# Then, generate a treeset using NNI:

scop.tre.c85d2 <- read.tree('../R1.pyRAD/analysis_raxml/RAxML_bestTree.c85d2m4p3')
scop.tre.c85d5 <- read.tree('../R1.pyRAD/analysis_raxml/RAxML_bestTree.c85d5m4p3')
scop.tre.c90d5 <- read.tree('../R1.pyRAD/analysis_raxml/RAxML_bestTree.c90d5m4p3')

scop.tre.c85d2$tip.label <- paste('>', scop.tre.c85d2$tip.label, sep = '')
scop.tre.c85d5$tip.label <- paste('>', scop.tre.c85d5$tip.label, sep = '')
scop.tre.c90d5$tip.label <- paste('>', scop.tre.c90d5$tip.label, sep = '')

scop.tre.c85d2 <- drop.tip(scop.tre.c85d2, '>stramin')
scop.tre.c85d5 <- drop.tip(scop.tre.c85d5, '>stramin')
scop.tre.c90d5 <- drop.tip(scop.tre.c90d5, '>stramin')

scop.nni.c85d2 <- genTrees(scop.tre.c85d2, maxmoves=3, N = 200, filebase = 'scop.nni.c85d2', perms = c(length(nni(scop.tre.c85d2)), 150, 150))
scop.nni.c85d5 <- genTrees(scop.tre.c85d5, maxmoves=3, N = 200, filebase = 'scop.nni.c85d5', perms = c(length(nni(scop.tre.c85d5)), 150, 150))
scop.nni.c90d5 <- genTrees(scop.tre.c90d5, maxmoves=3, N = 200, filebase = 'scop.nni.c90d5', perms = c(length(nni(scop.tre.c90d5)), 150, 150))

%%R

# then, generate datasets for each of your treesets:

scop.rad.c85d2 <- read.pyRAD('../R1.pyRAD/analysis_pyrad/_c85d2/outfiles/c85d2m4p3.loci')
scop.rad.c85d5 <- read.pyRAD('../R1.pyRAD/analysis_pyrad/_c85d5/outfiles/c85d5m4p3.loci')
scop.rad.c90d5 <- read.pyRAD('../R1.pyRAD/analysis_pyrad/_c90d5/outfiles/c90d5m4p3.loci')

gen.RAD.loci.datasets(scop.rad.c85d2, scop.nni.c85d2, fileBase = 'c85d2', cores = 10, taxa = scop.nni.c85d2[[1]]$tip.label)
gen.RAD.loci.datasets(scop.rad.c85d5, scop.nni.c85d5, fileBase = 'c85d5', cores = 10, taxa = scop.nni.c85d2[[1]]$tip.label)
gen.RAD.loci.datasets(scop.rad.c90d5, scop.nni.c90d5, fileBase = 'c90d5', cores = 10, taxa = scop.nni.c85d2[[1]]$tip.label)

%%R

# Each resulting dataset will have a shell script associated with it in the folder numbered '.0', and this should be executed 
# outside of R to get the RAxML output needed for the next steps of analysis. You can close or open R as you see fit, but if
# you close it, save the workspace (it will make your life easier in the next steps, and I have assumed in the next lines that
# you are working in the still-open workspace, or loaded the saved workspace and re-attached RADami). Read in the likelihood files,
# ranking the trees by likelihood:

scop.matched.c85d5 <- match.lnL.to.trees('c85d5.0')
scop.matched.c85d2 <- match.lnL.to.trees('c85d2.0')
scop.matched.c90d5 <- match.lnL.to.trees('c90d5.0')

# Then, plot them:

plot(scop.matched.c85d2, fileprefix='c85d2', panels = c('bestMat', 'worstMat', 'bestMat'), ci = rep(0.95, 3), regression = c(F, F, T), 
      lnL.break = c(-1E10, -1E10, -374000), pch = 3)
plot(scop.matched.c90d5, fileprefix='c90d5', panels = c('bestMat', 'worstMat', 'bestMat'), ci = rep(0.95, 3), regression = c(F, F, T), 
      lnL.break = c(-1E10, -1E10, -253350), pch = 3)
plot(scop.matched.c85d5, fileprefix='c85d5', panels = c('bestMat', 'worstMat'), ci = rep(0.95, 2), regression = c(T, F), 
      lnL.break = c(-1E10, -1E10), pch = 3)

# and look at the outliers:

pdf('c90d5.trees.pdf', width = 8.5, height = 11)
layout(matrix(1:6, 3, 2))
for(i in c(1, which(colSums(scop.ranks.c90d5$bestMat) > 59))) {
  plot(scop.nni.c90d5[[i]])
  title(main = paste('c90d5, tree',i))
  }
dev.off()