#!/usr/bin/env python # coding: utf-8 # ###Create C.gigas nucleotide BLAST database # In[8]: cd /Volumes/Data/blast_db/ # In[9]: get_ipython().system('makeblastdb -in /Volumes/Data/Sam/fish546_2015/gigasHSrnaSeq/raw_data/Crassostrea_gigas.GCA_000297895.1.24.fa -dbtype nucl -parse_seqids') # ####Confirm creation of BLAST db files # In[10]: get_ipython().system('ls Crassostrea_gigas.GCA_000297895.1.24.fa.*') # In[3]: cd /Users/Sam/scratch/ # ###Run blastn with C.gigas BLAST DB # In[8]: get_ipython().system('blastn -task blastn -query ../Owl/halfshell/EmmaBS400.fa -db Crassostrea_gigas.GCA_000297895.1.24.fa -outfmt 6 -max_target_seqs 1 -num_threads 16 -out 20150429_gigas_OA_blastn.tab') # ####Look at first 10 entries in blastn ouptut file # In[9]: get_ipython().system('head 20150429_gigas_OA_blastn.tab') # ####Look at last 10 entries in blastn ouptut file # In[10]: get_ipython().system('tail 20150429_gigas_OA_blastn.tab') # ####Count number of lines (i.e. blastn outputs) in output file # In[11]: get_ipython().system('wc -l 20150429_gigas_OA_blastn.tab') # ####Count the number of sequences in input FASTA file # In[12]: get_ipython().system("grep -c '>' ../Owl/halfshell/EmmaBS400.fa") # ####Check e-values # In[5]: get_ipython().system('cut -f 11 20150429_gigas_OA_blastn.tab | sort -n | head -50') # ####Count the number of e-values that are 0.0 # The code below cuts out the 11th column (-f 11) of our BLAST output file. That column is piped to grep. Grep finds the fixed string specified (-F) and those lines the matches that match the entire line (-x) and counts the lines (-c). # In[7]: get_ipython().system("cut -f 11 20150429_gigas_OA_blastn.tab | grep -cFx '0.0'")