# ie python fetchdata.py -d "[sr320@washington.edu].[BiGO_Methylation_oysterv9_GFF]​" -f tsv -o /Volumes/web/cnidarian/BiGO_Methylation10x_oysterv9.gff # running on commandline because cannot get to work in IPython python fetchdata.py -d "[sr320@washington.edu].[BiGo_methratio_GFF_boop]​" -f tsv -o /Volumes/web/cnidarian/BiGo_methratio_boop.gff #fetchdata failed #Dowloaded csv # should be same as !head /Volumes/web/cnidarian/BiGO_Methylation5x_oysterv9.gff !wc /Volumes/web/cnidarian/BiGO_Methylation5x_oysterv9.gff !head /Volumes/web/cnidarian/BiGo_methratio_boop.gff !wc /Volumes/web/cnidarian/BiGo_methratio_boop.gff from pandas import * # read data from data file into a pandas DataFrame BiGOboop = read_csv("http://eagle.fish.washington.edu/cnidarian/BiGo_methratio_GFF_boop.csv", # name of the data file sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? BiGOboop['score'].hist(bins=50); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 400000]) !head /Volumes/web/cnidarian/TJGR_oyster_v9_CGmotif.gff !fgrep -c "fuzznuc" /Volumes/web/cnidarian/TJGR_oyster_v9_CGmotif.gff !head /Volumes/web/cnidarian/TJGR_oyster_v9_CG.gff !fgrep -c "fuzznuc" /Volumes/web/cnidarian/TJGR_oyster_v9_CG.gff #ran before !head /Volumes/web/cnidarian/oyster_v9_CG_fuzznuc.output !fgrep -c "+ pattern: " /Volumes/web/cnidarian/oyster_v9_CG_fuzznuc.output !fgrep -c "nucleotide_motif" /Volumes/web/bivalvia/wholegenomefiles_MBDbsSeq_gill/gffs/TJGR_oyster_v9_CG.gff !intersectbed -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff -b /Volumes/web/cnidarian/TJGR_oyster_v9_CG.gff -c > /Volumes/web/cnidarian/TGR_intersectbed_CDS_v9_CGmotif.txt !intersectbed -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff -a /Volumes/web/cnidarian/TJGR_oyster_v9_CG.gff > /Volumes/web/cnidarian/TGR_CGmotif_intersect_exon.txt !head /Volumes/web/cnidarian/TGR_intersectbed_CDS_v9_CGmotif.txt !wc /Volumes/web/cnidarian/TGR_intersectbed_CDS_v9_CGmotif.txt cat /Volumes/web/cnidarian/TGR_intersectbed_CDS_v9_CGmotif.txt | awk -F"\t" '{ sum+=$10} END {print sum}' !head /Volumes/web/cnidarian/TGR_CGmotif_intersect_exon.txt !wc /Volumes/web/cnidarian/TGR_CGmotif_intersect_exon.txt ## CG intersect intron !intersectbed -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff > /Volumes/web/cnidarian/TGR_intersectbed_intron_v9_CG.gff !wc /Volumes/web/cnidarian/TGR_intersectbed_intron_v9_CG.gff ## CG intersect TE !intersectbed -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff > /Volumes/web/cnidarian/TGR_intersectbed_CG_TE.gff !wc /Volumes/web/cnidarian/TGR_intersectbed_CG_TE.gff ## CG intersect Promoter !intersectbed -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff > /Volumes/web/cnidarian/TGR_intersectbed_CG_prom.gff !wc /Volumes/web/cnidarian/TGR_intersectbed_CG_prom.gff !head /Volumes/web/cnidarian/TGR_intersectbed_CG_prom.gff ## CG intersect Promoter !intersectbed -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_COMP_gene_prom_TE.bed > /Volumes/web/cnidarian/TGR_intersectbed_CG_other.gff !wc /Volumes/web/cnidarian/TGR_intersectbed_CG_other.gff !head /Volumes/web/cnidarian/BiGo_methratio_mCG.csv !wc /Volumes/web/cnidarian/BiGo_methratio_mCG.csv !tr ',' "\t" /Volumes/web/cnidarian/BiGo_methratio_mCG.gff !head /Volumes/web/cnidarian/BiGo_methratio_mCG.gff !tail -n +2 /Volumes/web/cnidarian/BiGo_methratio_mCG.gff > /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff !head /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff ###mCG intersects with genome features !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff > /Volumes/web/cnidarian/TGR_mCG_intersect_exon.gff !wc /Volumes/web/cnidarian/TGR_mCG_intersect_exon.gff !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff > /Volumes/web/cnidarian/TGR_mCG_intersect_intron.gff !wc /Volumes/web/cnidarian/TGR_mCG_intersect_intron.gff !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff > /Volumes/web/cnidarian/TGR_mCG_intersect_TE.gff !wc /Volumes/web/cnidarian/TGR_mCG_intersect_TE.gff !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff > /Volumes/web/cnidarian/TGR_mCG_intersect_prom.gff !wc /Volumes/web/cnidarian/TGR_mCG_intersect_prom.gff !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_COMP_gene_prom_TE.bed > /Volumes/web/cnidarian/TGR_mCG_intersect_other.gff !wc /Volumes/web/cnidarian/TGR_mCG_intersect_other.gff sum(1134622+2886432+616223+5121236) !tail -n +2 /Volumes/web/cnidarian/BiGo_methratio_boop.gff > /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff !head /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff !wc /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff !wc /Volumes/web/cnidarian/BiGo_methratio_boop.gff !intersectbed -a /Volumes/web/cnidarian/oyster.v9.glean.final.rename.CDS.gff -b /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff -c > /Volumes/web/cnidarian/BiGo_CDS_v9_intersect_methratio_boop.txt !head /Volumes/web/cnidarian/BiGo_CDS_v9_intersect_methratio_boop.txt #wrong order !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_exon.gff > /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_CDS_b.gff !wc /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_CDS_b.gff !head /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_CDS_b.gff from pandas import * # read data from data file into a pandas DataFrame CDSmr = read_table("/Volumes/web/cnidarian/BiGo_methratio_boop_intersect_CDS_b.gff", # name of the data file #sep="\t", # what character separates each column? #na_values=["", " "], # what values should be considered "blank" values? header=None) CDSmr[5].hist(bins=50); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 150000]); plt.title('CDS'); # pandas density plot CDSmr[5].plot(kind='kde', linewidth=3); plt.title('CDS') !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_1k5p_gene_promoter.gff > /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_promoter.gff !head /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_promoter.gff from pandas import * # read data from data file into a pandas DataFrame Promr = read_table("/Volumes/web/cnidarian/BiGo_methratio_boop_intersect_promoter.gff", # name of the data file #sep="\t", # what character separates each column? #na_values=["", " "], # what values should be considered "blank" values? header=None) Promr[5].hist(bins=50); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 20000]); plt.title('Promoter'); # pandas density plot Promr[5].plot(kind='kde', linewidth=3); plt.title('Promoter'); !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_TE.gff > /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_TE.gff !head /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_TE.gff from pandas import * # read data from data file into a pandas DataFrame TEmr = read_table("/Volumes/web/cnidarian/BiGo_methratio_boop_intersect_TE.gff", # name of the data file #sep="\t", # what character separates each column? #na_values=["", " "], # what values should be considered "blank" values? header=None) TEmr[5].hist(bins=50); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 15000]); plt.title('Transposable Elements'); # pandas density plot TEmr[5].plot(kind='kde', linewidth=3); plt.title('Transposable Elements'); !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_intron.gff > /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_intron.gff from pandas import * # read data from data file into a pandas DataFrame Intronmr = read_table("/Volumes/web/cnidarian/BiGo_methratio_boop_intersect_intron.gff", # name of the data file #sep="\t", # what character separates each column? #na_values=["", " "], # what values should be considered "blank" values? header=None) Intronmr[5].hist(bins=50); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 150000]); plt.title('Intron'); # pandas density plot Intronmr[5].plot(kind='kde', linewidth=3); plt.title('Intron'); !intersectbed -a /Volumes/web/cnidarian/BiGo_methratio_boop_c.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_COMP_gene_prom_TE.bed > /Volumes/web/cnidarian/BiGo_methratio_boop_intersect_other.gff from pandas import * # read data from data file into a pandas DataFrame othermr = read_table("/Volumes/web/cnidarian/BiGo_methratio_boop_intersect_other.gff", # name of the data file #sep="\t", # what character separates each column? #na_values=["", " "], # what values should be considered "blank" values? header=None) othermr[5].hist(bins=50); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 60000]); plt.title('Other'); # pandas density plot othermr[5].plot(kind='kde', linewidth=3); plt.title('Other'); #For each gene, need to get number of CG and Number of methylated CG #methylated CG defined as 5x 50% #number of CGs per gene !intersectbed -c -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff > /Volumes/web/cnidarian/TJGR_gene_CGcount.txt !head /Volumes/web/cnidarian/TJGR_gene_CGcount.txt !wc /Volumes/web/cnidarian/TJGR_gene_CGcount.txt #BiGO methylaiton file !head /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff !tail /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff #number of mCGs per gene !intersectbed -c -a /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff -b /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff > /Volumes/web/cnidarian/BiGO_gene_mCGcount.txt !head /Volumes/web/cnidarian/BiGO_gene_mCGcount.txt !tail /Volumes/web/cnidarian/BiGO_gene_mCGcount.txt !wc /Volumes/web/cnidarian/BiGO_gene_mCGcount.txt #join #going for ugly excel cut and paste #sneak peek at percent methylation per gene (when CG count >/= 10) #only genes wiht >/= 10 cgs !head /Volumes/web/cnidarian/BiGo_gene_PerMeth.txt #now need to examine percent methylation versus gene expression !head /Volumes/web/cnidarian/BiGo_gene_pmeth_expression.csv #will use promoter track that does not overlap genebodies !wc /Volumes/web/cnidarian/TJGR_prom_subtract_gene1.gff #CG count per promoter !intersectbed -c -a /Volumes/web/cnidarian/TJGR_prom_subtract_gene1.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff > /Volumes/web/cnidarian/TJGR_prom_subgene_CGcount.txt !wc /Volumes/web/cnidarian/TJGR_prom_subgene_CGcount.txt #post tw/excel modding !head /Volumes/web/cnidarian/TJGR_prom_subgene_CGcount.txt #mCg per promoter !intersectbed -c -a /Volumes/web/cnidarian/TJGR_prom_subtract_gene1.gff -b /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff > /Volumes/web/cnidarian/BiGo_prom_subgene_CGcount.txt !head /Volumes/web/cnidarian/BiGo_prom_subgene_CGcount.txt #post tw/excel modding !head /Volumes/web/cnidarian/BiGo_prom_subgene_mCGcount.txt #give up into EXcel !head /Volumes/web/cnidarian/BiGo_prom_subgene_PerMeth.txt #file manipulated in excel !head /Volumes/web/cnidarian/BiGo_prom_subgene_pmeth_expression.txt Given that only "promoters" with islands are examined - an absolute methylation approach could be used #absolute methylation #using the conserved model #Window size [100]: #Minimum length of an island [200]: #Minimum observed/expected [0.6]: #Minimum percentage [50.]: 45 #promoter does not overlap gene !intersectbed -c -a /Volumes/web/cnidarian/TJGR_prom_notgene_cpgIsland1u.gff -b /Volumes/web/cnidarian/BiGo_methratio_mCG_tail.gff > /Volumes/web/cnidarian/BiGo_pro_island1u_intersect_mCpG.txt !head /Volumes/web/cnidarian/BiGo_pro_island1u_intersect_mCpG.txt !wc /Volumes/web/cnidarian/BiGo_pro_island1u_intersect_mCpG.txt !intersectbed -c -a /Volumes/web/cnidarian/TJGR_prom_notgene_cpgIsland1u.gff -b /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff > /Volumes/web/cnidarian/TJGR_pro_island1u_intersect_CpG.txt !wc /Volumes/web/cnidarian/TJGR_pro_island1u_intersect_CpG.txt #geneexprssion level !head /Volumes/web/cnidarian/BiGo_RNAseq_genes #lets plot absolute methylation versus expression #simpled up methylation file !wc /Volumes/web/cnidarian/BiGo_pro_island1u_intersect_mCpG_slm.csv