pwd
u'/Users/srlab'
!curl -O http://eagle.fish.washington.edu/cnidarian/Ruphibase.fa
% Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 17.8M 100 17.8M 0 0 17.2M 0 0:00:01 0:00:01 --:--:-- 17.2M
ls
Ruphibase.fa
!head Ruphibase.fa
>ruditapes2_lrc7040 ATTCAAATCTCTAACACTGATTCATACATGTAATAACTTGGCATACTATACATTATCAAC ATGTACTGTTACTTTCCTGTAATTGTTCAAAATATCTCTGGAATATTTTACACTTTATCT GTGGTTTTTTACAGTTTTTTTTTAATTGAAATAGTGATAACTTTGATTGAACATTCTTTT ATGTTTTAGCATCAAGATCTTCAAACTTGTAATACACACAATATCAATAACAAAATGTGA CAGTTTTATTTTCATTCATCATACACATCTTCCTTATCACATACATACTGACATAGATTC TGGTGTCATAAGACGGTCTGCATCTTGGTCAGGTATTTCAAATCTAAATTCATCTTCCAT TGCCATGATAACTTCTACAACATCTAAACTGTCCAATCCTAAATCATTCATAAAGTGTGA AGTCAATGACAGCTTTTCGGGATCAACTTTATCATAAAGTTGCAAAACGAGAATGACTCT TTCTTTAACATGAGATATTGTGAGAGCTGGCTTCTGACCATAATATCGAGGGTTTTGAAT
wd="/Volumes/web/scaphapoda/Grace/Transcriptomes/rphilippinarum"
dircode="rp"
cd {wd}
/Volumes/web/scaphapoda/Grace/Transcriptomes/rphilippinarum
!blastx \
-query Ruphibase.fa \
-db /Volumes/Data/blast_db/uniprot_sprot \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
-num_threads 8 \
-out blast_sprot.tab
Selenocysteine (U) at position 16 replaced by X
!wc -l blast_sprot.tab
!tr '|' "\t" <blast_sprot.tab> blast_sprot_sql.tab
!head blast_sprot_sql.tab
!python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \
-d {dircode}_uniprot \
blast_sprot_sql.tab
!python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \
-s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[md_uniprot]md left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on md.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on md.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \
-f tsv \
-o {dircode}_descriptions.txt
!head {dircode}_descriptions.txt
pylab inline
from pandas import *
gs = read_table('md_descriptions.txt')
gs.groupby('GOSlim_bin').Column1.count().plot(kind='barh', color=list('y'))
!egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation|penile|prostate|vulval" {dircode}_descriptions.txt / {dircode}_reprot.txt
!head -2 {dircode}_reprot.txt
#counting list of associated GO terms
!cut -f 2 {dircode}_reprot.txt | sort | uniq -c
!wc -l {dircode}_reprot.txt