#make a code for directory you want to use
wd="path to directory here"
dircode="code-name for directory here"
cd {wd}
#checks that you're in correct directory
#Blasting:
#be sure to have a transcriptome query fasta file named "query.fa" in your directory
!blastx \
-query query.fa \
-db /Volumes/Data/blast_db/uniprot_sprot \
-max_target_seqs 1 \
-outfmt 6 \
-num_threads 8 \
-out #blast_file_name.tab
!wc -l #blast_file_name.tab
#counts how many sequences were matched
!tr '|' "\t" <#blast_file_name.tab> new_file_name_sql.tab
#this changes pipes to tabs so that blast file can be joined in sql share
!head #new_file_name_sql.tab
#check to make sure columns are separated by tabs
!python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \
-d {dircode}_uniprot \
#new_file_name_sql.tab
#Uploads blast file that you just separated by tabs into SQL share
#"Joining" SQL Share
!python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \
-s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[{dircode}_uniprot]{dircode} left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on {dircode}.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on {dircode}.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \
-f tsv \
-o {dircode}_descriptions.txt
#In the "-s" row, change "{dircode}" to the actual code that you chose.
#This is the only cell where you have to change "{dircode}" to actual code name
!head {dircode}_descriptions.txt
#Don't need to change "{dircode}" to actual dircode you chose
#Checks that SQL share worked and is readable. Should have Column titles: "Column1, term, GOSlim_bin, aspect, ProteinName"
#Grep-ing terms pertaining to reproduction out of the SQL Share File
!egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation" {dircode}_descriptions.txt / {dircode}_reprot.txt
#Should have a list of proteins show up with the terms listed above in red