#make a code for directory you want to use wd="path to directory here" dircode="code-name for directory here" cd {wd} #checks that you're in correct directory #Blasting: #be sure to have a transcriptome query fasta file named "query.fa" in your directory !blastx \ -query query.fa \ -db /Volumes/Data/blast_db/uniprot_sprot \ -max_target_seqs 1 \ -outfmt 6 \ -num_threads 8 \ -out #blast_file_name.tab !wc -l #blast_file_name.tab #counts how many sequences were matched !tr '|' "\t" <#blast_file_name.tab> new_file_name_sql.tab #this changes pipes to tabs so that blast file can be joined in sql share !head #new_file_name_sql.tab #check to make sure columns are separated by tabs !python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \ -d {dircode}_uniprot \ #new_file_name_sql.tab #Uploads blast file that you just separated by tabs into SQL share #"Joining" SQL Share !python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \ -s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[{dircode}_uniprot]{dircode} left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on {dircode}.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on {dircode}.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \ -f tsv \ -o {dircode}_descriptions.txt #In the "-s" row, change "{dircode}" to the actual code that you chose. #This is the only cell where you have to change "{dircode}" to actual code name !head {dircode}_descriptions.txt #Don't need to change "{dircode}" to actual dircode you chose #Checks that SQL share worked and is readable. Should have Column titles: "Column1, term, GOSlim_bin, aspect, ProteinName" #Grep-ing terms pertaining to reproduction out of the SQL Share File !egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation" {dircode}_descriptions.txt / {dircode}_reprot.txt #Should have a list of proteins show up with the terms listed above in red