%%bash mkdir -p analysis_pyrad/ mkdir -p analysis_dtests/ mkdir -p analysis_raxml/ mkdir -p analysis_mrbayes/ %%bash ## create template file ~/Dropbox/pyrad-git/pyRAD -n mv params.txt analysis_pyrad/params.txt ## substitute in parameters sed -i '/## 1. /c\analysis_pyrad/_c90d5 ## 1. working directory ' analysis_pyrad/params.txt sed -i '/## 2. /c\/home/deren/Documents/RADSEQ_DATA/Carex/*.fastq ## 2. data file ' analysis_pyrad/params.txt sed -i '/## 3. /c\/home/deren/Documents/RADSEQ_DATA/barcodes/Carex1.barcodes ## 3. barcodes file ' analysis_pyrad/params.txt sed -i '/## 7. /c\10 ## 7. use multiple cores ' analysis_pyrad/params.txt sed -i '/## 8. /c\5 ## 8. set mindepth ' analysis_pyrad/params.txt sed -i '/## 9. /c\3 ## 9. max lowQual ' analysis_pyrad/params.txt sed -i '/## 10. /c\.90 ## 10. decreased clust threshold ' analysis_pyrad/params.txt sed -i '/## 11. /c\gbs ## 11. changed data type ' analysis_pyrad/params.txt sed -i '/## 14. /c\c90d5m4p3 ## 14. output name ' analysis_pyrad/params.txt sed -i '/## 21./c\2 ## 21. Filter type ' analysis_pyrad/params.txt sed -i '/## 23./c\3 ## 23. maxH consens ' analysis_pyrad/params.txt sed -i '/## 24./c\3 ## 24. maxN consens ' analysis_pyrad/params.txt sed -i '/## 29./c\2,2 ## 29. trim overhang edges ' analysis_pyrad/params.txt sed -i '/## 30./c\n ## 30. add output formats ' analysis_pyrad/params.txt sed -i '/## 34./c\99999 ## 34. no max stack size ' analysis_pyrad/params.txt %%bash ## create sub working directory mkdir -p analysis_pyrad/_c90d5/ ## demultiplex data files ~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 1 2&> /dev/null %%bash ## assemble data sets ~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 234567 2&> /dev/null ## print summary of results cat analysis_pyrad/_c90d5/stats/c90d5m4p3.stats %%bash ## create sub working directory mkdir -p analysis_pyrad/_c85d5/ ## set parameters in params file sed -i '/## 1. /c\analysis_pyrad/_c85d5 ## 1. working directory ' analysis_pyrad/params.txt sed -i '/## 10./c\.85 ## 10. decreased clust threshold ' analysis_pyrad/params.txt sed -i '/## 14./c\c85d5m4p3 ## 14. output name ' analysis_pyrad/params.txt sed -i '/## 18./c\analysis_pyrad/_c90d5/fastq/*.gz ## 18. use de-multiplexed data ' analysis_pyrad/params.txt %%bash ~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 234567 2&> /dev/null cat analysis_pyrad/_c85d5/stats/c85d5m4p3.stats %%bash ## create sub working directory mkdir -p analysis_pyrad/_c85d2/ ## copy re-usable files from _c85d5/ cp -r analysis_pyrad/_c85d5/stats/ analysis_pyrad/_c85d2/ cp -r analysis_pyrad/_c85d5/clust.85/ analysis_pyrad/_c85d2/ ## remove consens files b/c they will be remade at lower mindepth rm analysis_pyrad/_c85d2/clust.85/*consens* ## set new parameters in params file sed -i '/## 1. /c\analysis_pyrad/_c85d2 ## 1. working directory ' analysis_pyrad/params.txt sed -i '/## 8. /c\2 ## 8. set mindepth ' analysis_pyrad/params.txt sed -i '/## 14./c\c85d2m4p3 ## 14. output name ' analysis_pyrad/params.txt sed -i '/## 31./c\1 ## 31. allow lowdepth base calls ' analysis_pyrad/params.txt %%bash ~/Dropbox/pyrad-git/pyRAD -p analysis_pyrad/params.txt -s 567 2&> /dev/null cat analysis_pyrad/_c85d2/stats/c85d2m4p3.stats %%bash raxmlHPC-PTHREADS-AVX -f a \ -p 12345 \ -s analysis_pyrad/_c90d5/outfiles/c90d5m4p3.phy \ -x 12345 -N 100 \ -m GTRGAMMA \ -n c90d5m4p3 \ -o 'stramin' \ -w /home/deren/Documents/Marcial/Carex/analysis_raxml/ \ -T 5 2&> /dev/null %%bash raxmlHPC-PTHREADS-AVX -f a \ -p 12345 \ -s analysis_pyrad/_c85d5/outfiles/c85d5m4p3.phy \ -x 12345 -N 100 \ -m GTRGAMMA \ -n c85d5m4p3 \ -o 'stramin' \ -w /home/deren/Documents/Marcial/Carex/analysis_raxml/ \ -T 5 2&> /dev/null %%bash raxmlHPC-PTHREADS-AVX -f a \ -p 12345 \ -s analysis_pyrad/_c85d2/outfiles/c85d2m4p3.phy \ -x 12345 -N 100 \ -m GTRGAMMA \ -n c85d2m4p3 \ -o 'stramin' \ -w /home/deren/Documents/Marcial/Carex/analysis_raxml/ \ -T 5 2&> /dev/null ## create a nexus string with parameters for mrbayes run nexinfo = """ \nbegin mrbayes; lset nst=6 rates=invgamma; set autoclose=yes; mcmc ngen=10000000 printfreq=1000 samplefreq=1000 nchains=4 savebrlens=yes; sump; sumt; end; """ ## append mrbayes nexus info to the end of nexus sequence files ! echo '$nexinfo' >> analysis_pyrad/_c90d5/outfiles/c90d5m4p3.nex ! echo '$nexinfo' >> analysis_pyrad/_c85d5/outfiles/c85d5m4p3.nex ! echo '$nexinfo' >> analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex ## call mrbayes on each nexus file data set ## Do two runs for each data set quiet = ! mb analysis_pyrad/_c90d5/outfiles/c90d5m4p3.nex quiet = ! mb analysis_pyrad/_c85d5/outfiles/c85d5m4p3.nex quiet = ! mb analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex #quiet = ! mpirun -np 6 analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex ## clean up, move files to mrbayes directory ! mv /home/deren/Documents/Marcial/Carex/analysis_pyrad/_c90d5/outfiles/c90d5m4p3.nex.* analysis_mrbayes/ ! mv /home/deren/Documents/Marcial/Carex/analysis_pyrad/_c85d5/outfiles/c85d5m4p3.nex.* analysis_mrbayes/ ! mv /home/deren/Documents/Marcial/Carex/analysis_pyrad/_c85d2/outfiles/c85d2m4p3.nex.* analysis_mrbayes/ %%bash ## create Dtest template file ~/Dropbox/pyrad-git/pyRAD -D > analysis_dtests/input.D4 %%bash ## substitute in new parameters sed -i '/## N bootstrap/c\200 ## N bootstrap ' analysis_dtests/input.D4 sed -i '/## loc/c\analysis_pyrad/_c85d2/outfiles/c85d2m4p3.loci ## loc/path ' analysis_dtests/input.D4 sed -i '/## output file /c\analysis_dtests/test1 ## output file ' analysis_dtests/input.D4 sed -i '/## N cores/c\4 ## N cores ' analysis_dtests/input.D4 CS = ['2816','2830D','2893G1'] W = ['waponah'] CSU = ['suberec'] CL = ['longii'] CV = ['vexans'] CA = ['albolut'] S = ['stramin'] def makeD4(P1,P2,P3,O,poly,infile): ! head -n 8 $infile > temp ! mv temp $infile tests = [] if not poly: for o in O: for p3 in P3: for p2 in P2: for p1 in P1: if p1 != p2: if set((p1,p2,p3,o)) not in tests: tests.append(set((p1,p2,p3,o))) test = '%s\t%s\t%s\t%s\t##' % (p1,p2,p3,o) ## appends tests to input file ! echo '$test' >> analysis_dtests/input.D4 else: for p3 in P3: for p2 in P2: for p1 in P1: if p1 != p2: if set((p1,p2,p3)) not in tests: tests.append(set((p1,p2,p3))) test = '[%s]\t[%s]\t[%s]\t[%s]\t##' % (p1,p2,p3,",".join(O)) ## appends tests to input file ! echo '$test' >> analysis_dtests/input.D4 # make and print input file makeD4(CS,CS+W,CSU+CL+CV+CA,S,0,"analysis_dtests/input.D4") ! cat analysis_dtests/input.D4 quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4 ## change output file name ! sed -i '/## output file /c\analysis_dtests/test1.H ## output file ' analysis_dtests/input.D4 ## make new input file makeD4(CS,CS,CSU+CL+CV+CA,S,1,"analysis_dtests/input.D4") ## run the test quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4 ! cat analysis_dtests/test1.D4.txt ## change output file name ! sed -i '/## output file /c\analysis_dtests/test2 ## output file ' analysis_dtests/input.D4 ## make new input file makeD4(CS,CS,W,S+CL+CV+CA,0,"analysis_dtests/input.D4") ## run the test quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4 ! cat analysis_dtests/test2.D4.txt ## change output file name ! sed -i '/## output file /c\analysis_dtests/test2.H ## output file ' analysis_dtests/input.D4 ## make new input file makeD4(CS,CS,W,S+CL+CV+CA,1,"analysis_dtests/input.D4") ## run the test quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4 ## change output file name ! sed -i '/## output file /c\analysis_dtests/test3 ## output file ' analysis_dtests/input.D4 ## make new input file makeD4(CL,CV,CA+CSU,S+CS,0,"analysis_dtests/input.D4") ## run the test quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4 ! cat analysis_dtests/test3.D4.txt ## change output file name ! sed -i '/## output file /c\analysis_dtests/test3.H ## output file ' analysis_dtests/input.D4 ## make new input file makeD4(CL,CV,CA+CSU,S+CS,1,"analysis_dtests/input.D4") ## run the test quiet = ! ~/Dropbox/pyrad-git/pyRAD -d analysis_dtests/input.D4 ## If executing this code in the IPython notebook ## you will need to have the python library python-Rpy2 installed ## otherwise you can execute the code below in a separate R shell %load_ext rmagic %%R library(ape) library(RADami) %%R tre <- read.tree("analysis_raxml/RAxML_bestTree.c90d5m4p3") boots <- read.tree("analysis_raxml/RAxML_bipartitions.c90d5m4p3") plot(ladderize(tre)) nodelabels(boots$node.label, bg='grey') %%R tre <- read.tree("analysis_raxml/RAxML_bestTree.c85d5m4p3") boots <- read.tree("analysis_raxml/RAxML_bipartitions.c85d5m4p3") tre <- ladderize(rotate(tre,11)) plot(tre) nodelabels(boots$node.label, bg='grey') %%R tre <- read.tree("analysis_raxml/RAxML_bestTree.c85d2m4p3") boots <- read.tree("analysis_raxml/RAxML_bipartitions.c85d2m4p3") plot(ladderize(tre)) nodelabels(boots$node.label, bg='grey') %%R # Partitioned RAD analysis of the Carex scoparia trees # 2014-06-06 # First, prune to the ingroup taxa of relevance to your question. # Then, generate a treeset using NNI: scop.tre.c85d2 <- read.tree('../R1.pyRAD/analysis_raxml/RAxML_bestTree.c85d2m4p3') scop.tre.c85d5 <- read.tree('../R1.pyRAD/analysis_raxml/RAxML_bestTree.c85d5m4p3') scop.tre.c90d5 <- read.tree('../R1.pyRAD/analysis_raxml/RAxML_bestTree.c90d5m4p3') scop.tre.c85d2$tip.label <- paste('>', scop.tre.c85d2$tip.label, sep = '') scop.tre.c85d5$tip.label <- paste('>', scop.tre.c85d5$tip.label, sep = '') scop.tre.c90d5$tip.label <- paste('>', scop.tre.c90d5$tip.label, sep = '') scop.tre.c85d2 <- drop.tip(scop.tre.c85d2, '>stramin') scop.tre.c85d5 <- drop.tip(scop.tre.c85d5, '>stramin') scop.tre.c90d5 <- drop.tip(scop.tre.c90d5, '>stramin') scop.nni.c85d2 <- genTrees(scop.tre.c85d2, maxmoves=3, N = 200, filebase = 'scop.nni.c85d2', perms = c(length(nni(scop.tre.c85d2)), 150, 150)) scop.nni.c85d5 <- genTrees(scop.tre.c85d5, maxmoves=3, N = 200, filebase = 'scop.nni.c85d5', perms = c(length(nni(scop.tre.c85d5)), 150, 150)) scop.nni.c90d5 <- genTrees(scop.tre.c90d5, maxmoves=3, N = 200, filebase = 'scop.nni.c90d5', perms = c(length(nni(scop.tre.c90d5)), 150, 150)) %%R # then, generate datasets for each of your treesets: scop.rad.c85d2 <- read.pyRAD('../R1.pyRAD/analysis_pyrad/_c85d2/outfiles/c85d2m4p3.loci') scop.rad.c85d5 <- read.pyRAD('../R1.pyRAD/analysis_pyrad/_c85d5/outfiles/c85d5m4p3.loci') scop.rad.c90d5 <- read.pyRAD('../R1.pyRAD/analysis_pyrad/_c90d5/outfiles/c90d5m4p3.loci') gen.RAD.loci.datasets(scop.rad.c85d2, scop.nni.c85d2, fileBase = 'c85d2', cores = 10, taxa = scop.nni.c85d2[[1]]$tip.label) gen.RAD.loci.datasets(scop.rad.c85d5, scop.nni.c85d5, fileBase = 'c85d5', cores = 10, taxa = scop.nni.c85d2[[1]]$tip.label) gen.RAD.loci.datasets(scop.rad.c90d5, scop.nni.c90d5, fileBase = 'c90d5', cores = 10, taxa = scop.nni.c85d2[[1]]$tip.label) %%R # Each resulting dataset will have a shell script associated with it in the folder numbered '.0', and this should be executed # outside of R to get the RAxML output needed for the next steps of analysis. You can close or open R as you see fit, but if # you close it, save the workspace (it will make your life easier in the next steps, and I have assumed in the next lines that # you are working in the still-open workspace, or loaded the saved workspace and re-attached RADami). Read in the likelihood files, # ranking the trees by likelihood: scop.matched.c85d5 <- match.lnL.to.trees('c85d5.0') scop.matched.c85d2 <- match.lnL.to.trees('c85d2.0') scop.matched.c90d5 <- match.lnL.to.trees('c90d5.0') # Then, plot them: plot(scop.matched.c85d2, fileprefix='c85d2', panels = c('bestMat', 'worstMat', 'bestMat'), ci = rep(0.95, 3), regression = c(F, F, T), lnL.break = c(-1E10, -1E10, -374000), pch = 3) plot(scop.matched.c90d5, fileprefix='c90d5', panels = c('bestMat', 'worstMat', 'bestMat'), ci = rep(0.95, 3), regression = c(F, F, T), lnL.break = c(-1E10, -1E10, -253350), pch = 3) plot(scop.matched.c85d5, fileprefix='c85d5', panels = c('bestMat', 'worstMat'), ci = rep(0.95, 2), regression = c(T, F), lnL.break = c(-1E10, -1E10), pch = 3) # and look at the outliers: pdf('c90d5.trees.pdf', width = 8.5, height = 11) layout(matrix(1:6, 3, 2)) for(i in c(1, which(colSums(scop.ranks.c90d5$bestMat) > 59))) { plot(scop.nni.c90d5[[i]]) title(main = paste('c90d5, tree',i)) } dev.off()