pwd

!curl -O http://eagle.fish.washington.edu/cnidarian/Ruphibase.fa

ls

!head Ruphibase.fa

!fgrep -c ">" Ruphibase.fa

wd="/Volumes/web/scaphapoda/Grace/Transcriptomes/rphilippinarum"
dircode="rp"

cd {wd}

!blastx \
-query Ruphibase.fa \
-db /Volumes/Data/blast_db/uniprot_sprot \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
-num_threads 8 \
-out blast_sprot.tab

!wc -l blast_sprot.tab

!tr '|' "\t" <blast_sprot.tab> blast_sprot_sql.tab

!head blast_sprot_sql.tab

!python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \
-d {dircode}_uniprot \
blast_sprot_sql.tab

!python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \
-s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[rp_uniprot]rp left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on rp.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on rp.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \
-f tsv \
-o {dircode}_descriptions.txt

!head {dircode}_descriptions.txt

pylab inline

from pandas import *

gs = read_table('rp_descriptions.txt')

gs.groupby('GOSlim_bin').Column1.count().plot(kind='barh', color=list('y'))

!egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation|penile|prostate|vulval" <{dircode}_descriptions.txt> {dircode}_reprot.txt 

!head -2 {dircode}_reprot.txt

#counting list of associated GO terms
!cut -f 2 {dircode}_reprot.txt | sort | uniq -c

!wc -l {dircode}_reprot.txt