pwd
u'/Users/srlab'
!curl -O http://eagle.fish.washington.edu/cnidarian/Ruphibase.fa
% Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 17.8M 100 17.8M 0 0 17.2M 0 0:00:01 0:00:01 --:--:-- 17.2M
ls
Ruphibase.fa
!head Ruphibase.fa
>ruditapes2_lrc7040 ATTCAAATCTCTAACACTGATTCATACATGTAATAACTTGGCATACTATACATTATCAAC ATGTACTGTTACTTTCCTGTAATTGTTCAAAATATCTCTGGAATATTTTACACTTTATCT GTGGTTTTTTACAGTTTTTTTTTAATTGAAATAGTGATAACTTTGATTGAACATTCTTTT ATGTTTTAGCATCAAGATCTTCAAACTTGTAATACACACAATATCAATAACAAAATGTGA CAGTTTTATTTTCATTCATCATACACATCTTCCTTATCACATACATACTGACATAGATTC TGGTGTCATAAGACGGTCTGCATCTTGGTCAGGTATTTCAAATCTAAATTCATCTTCCAT TGCCATGATAACTTCTACAACATCTAAACTGTCCAATCCTAAATCATTCATAAAGTGTGA AGTCAATGACAGCTTTTCGGGATCAACTTTATCATAAAGTTGCAAAACGAGAATGACTCT TTCTTTAACATGAGATATTGTGAGAGCTGGCTTCTGACCATAATATCGAGGGTTTTGAAT
!fgrep -c ">" Ruphibase.fa
32606
wd="/Volumes/web/scaphapoda/Grace/Transcriptomes/rphilippinarum"
dircode="rp"
cd {wd}
/Volumes/web/scaphapoda/Grace/Transcriptomes/rphilippinarum
!blastx \
-query Ruphibase.fa \
-db /Volumes/Data/blast_db/uniprot_sprot \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
-num_threads 8 \
-out blast_sprot.tab
!wc -l blast_sprot.tab
19506 blast_sprot.tab
!tr '|' "\t" <blast_sprot.tab> blast_sprot_sql.tab
!head blast_sprot_sql.tab
ruditapes2_lrc7040 sp P52505 ACPM_BOVIN 65.82 79 27 0 528 292 66 144 8e-30 114 ruditapes2_c3688 sp P02637 SCP_MIZYE 33.03 109 71 2 95 418 1 108 3e-12 65.1 ruditapes2_c1400 sp Q86UP6 CUZD1_HUMAN 24.44 135 101 1 76 480 392 525 3e-10 63.5 ruditapes2_c3682 sp Q9D2R8 RT33_MOUSE 50.00 88 43 1 557 294 5 91 9e-19 83.6 ruditapes2_c4432 sp Q8VEM8 MPCP_MOUSE 81.82 121 22 0 62 424 237 357 7e-55 184 ruditapes2_c3421 sp O58530 RGYR_PYRHO 40.91 44 20 2 81 212 1339 1376 0.15 34.7 ruditapes2_c3350 sp Q4KLV7 F50AB_XENLA 31.03 58 39 1 888 715 258 314 3.8 32.7 ruditapes2_c3356 sp Q25379 ACT3_LYTPI 83.72 86 7 2 106 351 90 172 2e-40 144 ruditapes2_c3354 sp Q9BW30 TPPP3_HUMAN 41.40 157 74 5 157 621 30 170 5e-18 84.3 ruditapes2_c3427 sp B7IF15 DAPH_THEAB 33.33 66 43 1 476 670 38 103 7.8 31.2
!python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \
-d {dircode}_uniprot \
blast_sprot_sql.tab
processing chunk line 0 to 19506 (0.00798296928406 s elapsed) pushing blast_sprot_sql.tab... parsing 88DEA829... finished rp_uniprot
!python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \
-s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[rp_uniprot]rp left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on rp.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on rp.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \
-f tsv \
-o {dircode}_descriptions.txt
!head {dircode}_descriptions.txt
pylab inline
File "<ipython-input-36-b794e5809f34>", line 1 pylab inline ^ SyntaxError: invalid syntax
from pandas import *
gs = read_table('rp_descriptions.txt')
gs.groupby('GOSlim_bin').Column1.count().plot(kind='barh', color=list('y'))
<matplotlib.axes.AxesSubplot at 0x10ece1ad0>
!egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation|penile|prostate|vulval" <{dircode}_descriptions.txt> {dircode}_reprot.txt
!head -2 {dircode}_reprot.txt
#counting list of associated GO terms
!cut -f 2 {dircode}_reprot.txt | sort | uniq -c
!wc -l {dircode}_reprot.txt
617 rp_reprot.txt