!blastx \ -query /Volumes/web/scaphapoda/Grace/Transcriptomes/M_donacium_fasta.fa \ -db /Volumes/Data/blast_db/uniprot_sprot \ -max_target_seqs 1 \ -max_hsps 1 \ -outfmt 6 \ -num_threads 16 \ -out /Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird.tab !blastx !tail /Volumes/web/scaphapoda/Grace/Transcriptomes/M_donacium_fasta.fa !fgrep -c ">" /Volumes/web/scaphapoda/Grace/Transcriptomes/M_donacium_fasta.fa !tail /Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird.tab time !wc -l /Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird.tab #finished!! !head /Volumes/web/scaphapoda/Grace/meodesma_blastx_uniprot_hummingbirdSPID.tab #translate pipes to tabs !tr '|' "\t" /Volumes/web/scaphapoda/Grace/meodesma_blastx_uniprot_hummingbirdSPID2.tab !head /Volumes/web/scaphapoda/Grace/meodesma_blastx_uniprot_hummingbirdSPID2.tab #translate commas to tabs !tr ',' "\t" /Volumes/web/scaphapoda/Grace/meodesma_blastx_uniprot_hummingbirdSPID3.tab !head /Volumes/web/scaphapoda/Grace/meodesma_blastx_uniprot_hummingbirdSPID3.tab #run in SQL share against SPID and GO Numbers and GO_to_GOslim #"where aspect like 'P'" narrows information down to proteins that are involved in biological processes #"Column1, term, GOSlim_bin, aspect" select specific columns of information !head /Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird_GOSlimterms.tab #to get unique terms !awk '{print $4}' /Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird_GOSlimterms.tab | sort | uniq #grep all terms pertaining to reproduction/sex-determination/etc !egrep -wi --color 'aldosterone|female|gametogenesis|genitalia|germ|germ-line|germarium-derived|germinal|germination|hormone|juvenile|male|mating-type|oocyte|ovulation|penile|postnatal|prostate|prostatic|reproduction|vulval' /Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird_GOSlimterms.tab #want to make chart separating aspects of reproduction #not sure how to do that from pandas import * jslim = read_table("/Volumes/web/scaphapoda/Grace/mesodesma_blastx_uniprot_hummingbird_GOSlimterms.tab", # name of the data file #sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? jslim.groupby('GOSlim_bin').Column1.count().plot(kind='bar')