#allows plots to be shown inline %pylab inline #Setting Working Directory wd="/Volumes/web/whale/fish546/qpx_go_val" #Setting directory of Blast Databases dbd="/Volumes/Bay3/Software/ncbi-blast-2.2.29\+/db/" #Database name dbn="uniprot_sprot_r2013_12" #Blast algorithim ba="blastx" #Location of SQLShare python tools: you can empty ("") if tools are in PATH spd="/Users/sr320/sqlshare-pythonclient/tools/" cd {wd} !{ba} -query query.fa -db {dbd}{dbn} -out {dbn}_{ba}_out.tab -evalue 1E-50 -num_threads 4 -max_hsps_per_subject 1 -max_target_seqs 1 -outfmt 6 !head -1 {dbn}_{ba}_out.tab #Translate pipes to tab so SPID is in separate column for Joining !tr '|' "\t" <{dbn}_{ba}_out.tab> {dbn}_{ba}_out2.tab !head -1 {dbn}_{ba}_out2.tab #Uploads formatted blast table to SQLshare; currently has generic name and meant to be temporary: Warning will overwrite. !python {spd}singleupload.py -d scratchblast_out {dbn}_{ba}_out2.tab !python {spd}fetchdata.py -s "SELECT * FROM [sr320@washington.edu].[scratchblast_out]blast Left Join [sr320@washington.edu].[uniprot-reviewed_wGO_010714]unp ON blast.Column3 = unp.Entry Left Join [sr320@washington.edu].[SPID and GO Numbers]go ON unp.Entry = go.SPID Left Join [sr320@washington.edu].[GO_to_GOslim]slim ON slim.GO_id = go.GOID" -f tsv -o {dbn}_join2goslim.txt !head -2 {dbn}_join2goslim.txt !python {spd}singleupload.py -d scratchjoin_slim {dbn}_join2goslim.txt #Sets GO aspect !python {spd}fetchdata.py -s "SELECT Distinct Column1 as query, Column3 as SPID, GOSlim_bin FROM [sr320@washington.edu].[scratchjoin_slim] Where aspect = 'P'" -f tsv -o justslim.txt !head justslim.txt from pandas import * jslim = read_table("justslim.txt", # name of the data file #sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? jslim.groupby('GOSlim_bin').query.count().plot(kind='bar') !python {spd}singleupload.py -d scratchjoin_slim {dbn}_join2goslim.txt #Sets GO aspect !python {spd}fetchdata.py -s "SELECT Distinct Column1 as query, Column3 as SPID, GOSlim_bin FROM [sr320@washington.edu].[scratchjoin_slim] Where aspect = 'P'" -f tsv -o justslim.txt !head justslim.txt #from pandas import * jslim = read_table("justslim.txt", # name of the data file #sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? jslim.groupby('GOSlim_bin').query.count().plot(kind='bar') !say "hash tag winning" #could also upload again to get a simple table #could be done in pandas #!python {spd}singleupload.py -d scratchpie justslim.txt #fetching data grouped by GObin #!python {spd}fetchdata.py -s "SELECT GOSlim_bin, COUNT(GOSlim_bin) as termcount from [sr320@washington.edu].[scratchpie] Group by GOSlim_bin" -f tsv -o justpie.txt