!head /Volumes/web/scaphapoda/Grace/Transcriptomes/mercenaria/query.fa
>Mmercenaria_Contig_1 AAGAGTGACTGGTACCACCTGTGTACTACAATGGTTATTTGATACAACTAAATGTAAGCGGTACCACCATGTATTACAATGTGAAATTAGTATCAATAAGTGTGGCTGGTACCTTTATATATTACAGGTGCTGTTATGTTTGACAGGAATACTGATGTGAGATAGTTACTTCCATACTATGTGTAACCTACGGTCCGGCACGTTGAATGGTGGGGTG >Mmercenaria_Contig_2 ACAGCTGTCTGATTACTTATACAAAGAACACGGGTTTAAAGCAGAAATGATTGATACTCTGTACAACTATGCCAAGTTTCAGTATGAATGTGGTAATTATTCTGCAGCAGCTGAATATCTCTACTTTGTTAGAATCCTGCTACCACCAAATGACAGAAATTACTTGAATGCATTATGGGGGAAGTTAGCTTCAGAGATTCTCATGCAAACGACCAGTG >Mmercenaria_Contig_3 TGACGAGACTCTCAAGTTCATTGCAAGAAAGTTTACTGATGCAAAAATGTAATTTATCTCAGTGAAGGTCTATAGGAGTATCCCAGCTTCTTTTGAGGAGTCAACAATTTTCATAGCTGTAGTTAGATGCCAGTCTTCTGTAGAAACTACCCAGGATTCCATTATTTCTTCTGATTGATCAGTGGTTGCCTAGCAATGAAGTGTTTCACAAAAAGCT >Mmercenaria_Contig_4 TATTTTGAGCATAACTTATAACCCGTTCAACGTTCAAGGTATTGACTTCTGACTGGGAATATAAGTAGGTGGCAATGAAACCATGTGCAGATCGGTAGGTCAAATGTTAATGTAGTCAGATCTAACTGTCATATTTCATGGTCCGTACTCGACCTCCTTTAATCTAAAACTTTTGACGTATATTGCACCGCTTTGCGGAGATCTTGTTTGATTATAATTTGACTTTGTTATGGCTTTCACTAGTTT >Mmercenaria_Contig_5 TAAAAGAACGCATACACCCATCAGTTTTGAAACTATTTTAGTTAATTTCATTATACAATTCAGAGTAGGTGTCAAAATTTCATGATAAACTGTCAGAACTGGTAGAAGTCTCCGTGATCGACCATTTTACATTTATTTCCCTAACAGATTGTTGTTTATCTCTACGACATCGTTAAATTAAATAGCAACATTTTAAGAACATCTCCGACAATCA
!fgrep -c ">" /Volumes/web/scaphapoda/Grace/Transcriptomes/mercenaria/query.fa
8482
wd="/Volumes/web/scaphapoda/Grace/Transcriptomes/mercenaria"
dircode="me"
cd {wd}
/Volumes/web/scaphapoda/Grace/Transcriptomes/mercenaria
!blastx \
-query query.fa \
-db /Volumes/Data/blast_db/uniprot_sprot \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6 \
-num_threads 8 \
-out blast_sprot.tab
!wc -l blast_sprot.tab
7174 blast_sprot.tab
!tr '|' "\t" <blast_sprot.tab> blast_sprot_sql.tab
!head blast_sprot_sql.tab
Mmercenaria_Contig_1 sp P06538 DPOL_ADE12 26.09 46 34 0 141 4 100 145 6.2 28.5 Mmercenaria_Contig_2 sp Q6DRI1 EI3EA_DANRE 75.00 68 17 0 5 208 114 181 2e-29 112 Mmercenaria_Contig_3 sp O94823 AT10B_HUMAN 61.11 18 7 0 162 215 99 116 2.2 29.6 Mmercenaria_Contig_5 sp P0A5H8 EFPP_MYCTU 63.16 19 7 0 117 61 20 38 0.64 31.2 Mmercenaria_Contig_6 sp Q9WU60 ATRN_MOUSE 28.85 52 33 1 168 13 808 855 0.12 33.9 Mmercenaria_Contig_8 sp P18547 VNCS_PAVPN 50.00 22 11 0 111 176 362 383 0.85 30.8 Mmercenaria_Contig_9 sp A8WGF4 IF122_XENTR 67.16 67 22 0 1 201 894 960 6e-24 99.4 Mmercenaria_Contig_10 sp Q4QK86 MUKB_HAEI8 29.79 47 33 0 16 156 262 308 1.6 30.0 Mmercenaria_Contig_11 sp Q0AQ76 THIG_MARMM 34.09 44 27 1 210 79 84 125 3.4 28.9 Mmercenaria_Contig_12 sp P15106 GLNA_STRCO 39.29 28 17 0 40 123 124 151 0.84 30.8
!python /Applications/sqlshare-pythonclient-master/tools/singleupload.py \
-d {dircode}_uniprot \
blast_sprot_sql.tab
processing chunk line 0 to 7174 (0.00320911407471 s elapsed) pushing blast_sprot_sql.tab... parsing DDA8388F... finished me_uniprot
!python /Applications/sqlshare-pythonclient-master/tools/fetchdata.py \
-s "SELECT Column1, term, GOSlim_bin, aspect, ProteinName FROM [graceac9@washington.edu].[me_uniprot]me left join [samwhite@washington.edu].[UniprotProtNamesReviewed_yes20130610]sp on me.Column3=sp.SPID left join [sr320@washington.edu].[SPID and GO Numbers]go on me.Column3=go.SPID left join [sr320@washington.edu].[GO_to_GOslim]slim on go.GOID=slim.GO_id where aspect like 'P'" \
-f tsv \
-o {dircode}_descriptions.txt
!head {dircode}_descriptions.txt
pylab inline
Populating the interactive namespace from numpy and matplotlib
from pandas import *
gs = read_table('me_descriptions.txt')
gs.groupby('GOSlim_bin').Column1.count().plot(kind='barh', color=list('y'))
<matplotlib.axes.AxesSubplot at 0x10e13b7d0>
!egrep --color "male|female|genitalia|gonad|ovarian|reproduction|estrogen|testosterone|gametogenesis|germination|ovulation|penile|prostate|vulval" {dircode}_descriptions.txt / {dircode}_reprot.txt
!head -2 {dircode}_reprot.txt
#counting list of associated GO terms
!cut -f 2 {dircode}_reprot.txt | sort | uniq -c
!wc -l {dircode}_reprot.txt
288 me_reprot.txt