!head /Volumes/web/cnidarian/oyster.v9.gene_mRNA.gff

!wc /Volumes/web/cnidarian/oyster.v9.gene_mRNA.gff

#September 12, 2013


#export of transcript-based table
!wc /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_1.txt

!head /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_1.txt

from pandas import *

# read data from data file into a pandas DataFrame  
BiGoRNAseq_exon = read_table("/Volumes/web/cnidarian/BiGoRNAseq_exon_exp_1.txt", # name of the data file
            #sep=",", # what character separates each column?
            na_values=["", " "]) # what values should be considered "blank" values?

BiGoRNAseq_exon.dtypes

print BiGoRNAseq_exon

BiGoRNAseq_exon['RPKM'].hist(bins=100);
#Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function.
axis([0, 1000, 0, 1000000])


BiGoRNAseq_exon(x='Transcripts annotated', y='Transcripts length', style='o');

!head /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track.csv

!tr ',' "\t" </Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track.csv> /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track.gff

!head /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track.gff

!wc /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track.gff

!head /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track2.csv

!tr ',' "\t" </Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track2.csv> /Volumes/web/cnidarian/BiGoRNAseq_exon_exp_track2.igv

#WHY not just run bedtools on Accepted hit bam?


!head /Volumes/web/cnidarian/BiGo_RNAseq_genes

#September 11 2013

!tail /Volumes/web/cnidarian/BiGo_RNAseq_unmapped.fa

!blastx -query /Volumes/web/cnidarian/BiGo_RNAseq_unmapped.fa -db /Volumes/web/whale/fish546/blast/db/swissprot -out /Volumes/web/cnidarian/BiGoRNAseq_unmapped_swissprot_blastout -outfmt 6 -evalue 1E-10 -max_target_seqs 1 -num_threads 2

!head /Volumes/web/cnidarian/BiGoRNAseq_unmapped_swissprot_blastout

!wc /Volumes/web/cnidarian/BiGoRNAseq_unmapped_swissprot_blastout

##Blast againa and get taxonomy info
# ./blastn -query /Volumes/web/cnidarian/BiGo_RNAseq_unmapped.fa -db /Volumes/CLC_blastdatabases/nt -out /Volumes/web/cnidarian/BiGo_RNAseq_unmapped_nt_blastout_taxa2 -outfmt "6 std stitle staxids sscinames scomnames sblastnames" -evalue 1E-20 -max_target_seqs 1 -task blastn -num_threads 6

!head /Volumes/web/cnidarian/BiGo_RNAseq_unmapped_nt_blastout_taxa2

###Get Taxonomic Distribution
#SQLSshare


!head /Volumes/web/cnidarian/BiGoRNAseq_taxid_unmapped.csv

!wc /Volumes/web/cnidarian/BiGoRNAseq_taxid_unmapped.csv

from pandas import *

# read data from data file into a pandas DataFrame  
BiGoRNAsequm = read_csv("http://eagle.fish.washington.edu/cnidarian/BiGoRNAseq_taxid_unmapped.csv", # name of the data file
            sep=",", # what character separates each column?
            na_values=["", " "]) # what values should be considered "blank" values?

print BiGoRNAsequm

BiGoRNAsequm['Unnamed: 1'].hist(bins=50);
#Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function.
#plt.axis([0, 1, 0, 40])
SELECT Column1,
  Column13
FROM [sr320@washington.edu].[BiGo_RNAseq_unmapped_nt_blastout_taxa2]​
@fu

ls /Volumes/web/cnidarian/tophat_071313

!head /Volumes/web/cnidarian/tophat_071313/junctions.bed

cd /Volumes/Bay3/Software/BSMAP/bsmap-2.74/samtools

ls

!samtools

cp samtools /usr/local/bin

!samtools

!samtools view -c /Volumes/web/cnidarian/tophat_071313/s_1.bam

#only mapped reads
!samtools view -c -F 4 /Volumes/web/cnidarian/tophat_071313/s_1.bam

#unmapped reads
!samtools view -c -f 4 /Volumes/web/cnidarian/tophat_071313/s_1.bam

!samtools view -c /Volumes/web/cnidarian/tophat_071313/accepted_hits.bam

!samtools view -c /Volumes/web/cnidarian/tophat_071313/unmapped.bam

!samtools flagstat /Volumes/web/cnidarian/tophat_071313/s_1.bam

!samtools flagstat /Volumes/web/cnidarian/tophat_071313/accepted_hits.bam

!samtools flagstat /Volumes/web/cnidarian/tophat_071313/unmapped.bam

#cufflinks failed


!wc /Volumes/web/cnidarian/BiGo_RNAseq_fastx_qual.fastq

!wc /Volumes/web/cnidarian/BiGo_RNAseq_PreProcess.fastq

cd /Volumes/web/cnidarian/

!iget -r /iplant/home/sr320/analyses/FastQC_0.10.1__BiGoRNA-2014-03-24-10-33-44.916

Run on filtered file..
Still not great....
new file

!head /Volumes/web/cnidarian/BiGoRNA_genetable_clc

#Location of SQLShare python tools: you can empty ("") if tools are in PATH
spd="/Users/sr320/sqlshare-pythonclient/tools/"

!python {spd}singleupload.py -d BiGoRNA_genetable_clc /Volumes/web/cnidarian/BiGoRNA_genetable_clc


%pylab inline

import numpy as np
import matplotlib.pyplot as plt

from pandas import *

# read data from data file into a pandas DataFrame  
BiGoRNA = read_table("/Volumes/web/cnidarian/BiGoRNA_genetable_clc", # name of the data file
            #sep="\t", # what character separates each column?
            #na_values=["", " "], # what values should be considered "blank" values?
            #header=None
            )

BiGoRNA


plot.BiGoRNA["Expression value"]
#Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function.
#plt.axis([0, 200, 0, 30000]);
#plt.title('Expression value');


plt.hist(BiGoRNA["Total intron reads"], 20)
plt.axis([0, 100, 0, 2000])
plt.show()


#generate some data
x = BiGoRNA["Expression value"]
y = BiGoRNA["Total intron reads"]

#plot the data
plt.plot(x, y, 'bo')
plt.show()

fig = plt.loglog(x, y, 'rs')


fig = plt.semilogx(x, y, 'g^')