pwd !curl -O http://eagle.fish.washington.edu/cnidarian/Ruphibase.fa ls !head Ruphibase.fa !fgrep -c ">" Ruphibase.fa wd="/Volumes/web/scaphapoda/Grace/Transcriptomes/rphilippinarum" dircode="rp" cd {wd} !blastx \ -query Ruphibase.fa \ -db /Volumes/Data/blast_db/uniprot_sprot \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt 6 \ -num_threads 8 \ -out blast_sprot.tab !wc -l blast_sprot.tab !tr '|' "\t" blast_sprot_sql.tab !head blast_sprot_sql.tab !python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \ -d {dircode}_uniprot \ blast_sprot_sql.tab !python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \ -s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[rp_uniprot]rp left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on rp.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on rp.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \ -f tsv \ -o {dircode}_descriptions.txt !head {dircode}_descriptions.txt pylab inline from pandas import * gs = read_table('rp_descriptions.txt') gs.groupby('GOSlim_bin').Column1.count().plot(kind='barh', color=list('y')) !egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation|penile|prostate|vulval" <{dircode}_descriptions.txt> {dircode}_reprot.txt !head -2 {dircode}_reprot.txt #counting list of associated GO terms !cut -f 2 {dircode}_reprot.txt | sort | uniq -c !wc -l {dircode}_reprot.txt