!date #allows plots to be shown inline %pylab inline #Setting Working Directory wd="/Users/Mackenzie/Desktop/FISH546/wd" #Setting directory of Blast Databases !!! make sure you have last '/' dbd="/Users/Mackenzie/Desktop/FISH546/db/" #Database name dbn="spdb" #Blast algorithim complete path ba="/Users/Shared/Apps/ncbi-blast-2.2.29\+/bin/blastx" #Location of SQLShare python tools: you can empty ("") if tools are in PATH !!! make sure you have last '/' spd="/Users/Mackenzie/sqlshare-pythonclient/tools/" cd {wd} #for some reason max hsp produced error and removed !{ba} -query query.fa -db {dbd}{dbn} -out {dbn}_blast_out.tab -evalue 1E-50 -num_threads 4 -max_target_seqs 1 -outfmt 6 !head -1 {dbn}_blast_out.tab #Translate pipes to tab so SPID is in separate column for Joining !tr '|' "\t" <{dbn}_blast_out.tab> {dbn}_blast_out2.tab !head -1 {dbn}_blast_out2.tab #Uploads formatted blast table to SQLshare; currently has generic name and meant to be temporary: Warning will overwrite. !python {spd}singleupload.py -d scratchblast_out {dbn}_blast_out2.tab !python {spd}fetchdata.py -s "SELECT * FROM [mgavery@washington.edu].[scratchblast_out]blast Left Join [sr320@washington.edu].[uniprot-reviewed_wGO_010714]unp ON blast.Column3 = unp.Entry Left Join [sr320@washington.edu].[SPID and GO Numbers]go ON unp.Entry = go.SPID Left Join [sr320@washington.edu].[GO_to_GOslim]slim ON slim.GO_id = go.GOID" -f tsv -o {dbn}_join2goslim.txt !head -2 {dbn}_join2goslim.txt !python {spd}singleupload.py -d scratchjoin_slim {dbn}_join2goslim.txt #Sets GO aspect !python {spd}fetchdata.py -s "SELECT Distinct Column1 as query, Column3 as SPID, GOSlim_bin FROM [mgavery@washington.edu].[scratchjoin_slim] Where aspect = 'P'" -f tsv -o justslim.txt !head justslim.txt from pandas import * jslim = read_table("justslim.txt", # name of the data file #sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? jslim.groupby('GOSlim_bin').query.count().plot(kind='bar') !say "hash tag winning" #could also upload again to get a simple table #could be done in pandas #!python {spd}singleupload.py -d scratchpie justslim.txt #fetching data grouped by GObin #!python {spd}fetchdata.py -s "SELECT GOSlim_bin, COUNT(GOSlim_bin) as termcount from [sr320@washington.edu].[scratchpie] Group by GOSlim_bin" -f tsv -o justpie.txt