#directory on greenbird ls /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq cd Shared/Apps/bsmap-2.73/ ! /Users/Shared/Apps/bsmap-2.73/bsmap #F ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgF_TTAGGC_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgF_TTAGGC_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_F.sam -p 8 #methratio on female !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_F_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_F.sam from pandas import * # read data from data file into a pandas DataFrame F = read_table("http://eagle.fish.washington.edu/cnidarian/BiGo_lar_F_methratio_v9_A.txt", # name of the data file # sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? !head /Volumes/web/cnidarian/BiGo_lar_F_methratio_v9_A.txt !wc /Volumes/web/cnidarian/BiGo_lar_F_methratio_v9_A.txt F F['CT_count'].hist(bins=100); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 120, 0, 1000]) #TID3 ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgLarv_T1D3_TGACCA_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgLarv_T1D3_TGACCA_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_T1D3.sam -p 8 #TID3 methratio !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_TID3_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_T1D3.sam from pandas import * # read data from data file into a pandas DataFrame TID3 = read_table("http://eagle.fish.washington.edu/cnidarian/BiGo_lar_TID3_methratio_v9_A.txt", # name of the data file # sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? TID3['CT_count'].hist(bins=100); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 200, 0, 1000]) #not sure what this does #where ('CT_count') < 100 #T1D5 ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgLarv_T1D5_ACAGTG_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgLarv_T1D5_ACAGTG_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_T1D5.sam -p 8 #TID5 methratio !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_TID5_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_T1D5.sam #T3D3 ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_Bs_CgLarve_T3D3_GCCAAT_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_Bs_CgLarve_T3D3_GCCAAT_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_T3D3.sam -p 8 #T3D3 methratio !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_T3D3_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_T3D3.sam #T3D5 ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgLarv_T3D5_CAGATC_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgLarv_T3D5_CAGATC_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_T3D5.sam -p 8 #T3D5 methratio !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_T3D5_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_T3D5.sam !wc /Volumes/web/cnidarian/BiGo_lar_T3D5_methratio_v9_A.txt #M1 ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgM1_ACTTGA_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgM1_ACTTGA_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_M1.sam -p 8 #M1 methratio !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_M1_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_M1.sam #M3 ! /Users/Shared/Apps/bsmap-2.73/bsmap -a /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgM3_GATCAG_L007_R1.fastq.gz -b /Volumes/NGS\ Drive/NGS\ Raw\ Data/Cg_larvae_BSseq/filtered_BS_CgM3_GATCAG_L007_R2.fastq.gz -d /Volumes/web/cnidarian/oyster.v9.fa -o /Volumes/web/cnidarian/BiGo_lar_M3.sam -p 8 #M3 methratio !python /Users/Shared/Apps/bsmap-2.74/methratio.py -d /Volumes/web/cnidarian/oyster.v9.fa -u -z -g -o /Volumes/web/cnidarian/BiGo_lar_M3_methratio_v9_A.txt -s /Users/Shared/Apps/bsmap-2.74/samtools /Volumes/web/cnidarian/BiGo_lar_M3.sam !wc /Volumes/web/cnidarian/BiGo_lar_M3_methratio_v9_A.txt Female pairs: 180566 (56%) single a: 48357 (15%) single b: 37390 (12%) T1D3 pairs: 1639918 (49%) single a: 709577 (21%) single b: 541078 (16%) T1D5 pairs: 1142766 (42%) single a: 764422 (28%) single b: 567963 (21%) T3D3 pairs: 3130107 (54%) single a: 1140569 (20%) single b: 880396 (15%) T3D5 pairs: 2414419 (55%) single a: 819661 (19%) single b: 637662 (14%) M1 pairs: 2600662 (47%) single a: 1090658 (20%) single b: 894893 (16%) M3 pairs: 3032870 (55%) single a: 996127 (18%) single b: 789107 (14%) All methratio coverage about 1.3x !python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -h Query python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGO_lar_M3] where context like '__CG_' and CT_Count > 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CG.txt #2.8 million lines #!!!!!! 3447 loci 5x coverage # 401 CG #M3 methratio output has 42 million lines #!!!!!!!! 41 million lines have CT_count <5 ---- 97% # #19 M - strand #23 m + strand from pandas import * # read data from data file into a pandas DataFrame m3 = read_table("http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M3_methratio_v9_A.txt", # name of the data file # sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? m3['CT_count'].hist(bins=5000); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 10, 0, 10000000]) m3['CT_count'].hist(bins=5000); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 100, 0, 2000]) #3010 CG loci have >20 coverage !head /Volumes/web/cnidarian/BiGo_lar_M3_methratio_v9_A.txt #Mac's gill sample #methratio output had 91 million lines #69 million lines have CT_count <5 --- 75% #Claire's sperm data #methratio output had 126 million lines (after ratio NA removed) vs 127/22 #22 million lines have CT_count <5 --- 17% !python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -d BiGo_lar_M3 /Volumes/web/cnidarian/BiGo_lar_M3_methratio_v9_A.txt #Creating GFF of CG with 5x !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGO_lar_M3] where context like '__CG_' and CT_Count >= 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CG.txt !head /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CG.txt !wc /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CG.txt #scratch !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGO_lar_M3] where context like '__CT_' and CT_Count >= 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CT.txt !wc /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CT.txt #confirming CNs add up #CG 182k #CC 72k #CA 240k #CT 131k # - 625k #^ that would have just been positive strand. # yes add up from pandas import * # read data from data file into a pandas DataFrame m3CG = read_table("http://eagle.fish.washington.edu/cnidarian/BiGo_lar_M3_methratio_CG.txt", # name of the data file # sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? !head /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CG.txt !wc /Volumes/web/cnidarian/BiGo_lar_M3_methratio_CG.txt m3CG['score'].hist(bins=500); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 10000]) #worried about score... from pandas import * # read data from data file into a pandas DataFrame m3c = read_csv("http://eagle.fish.washington.edu/cnidarian/clean_BiGo_lar_M3_cg5.csv", # name of the data file sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? m3c['ratio'].hist(bins=4); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. #plt.axis([0, 1, 0, 10000]) # 136751 CG loci have methylation ratio of 0 @ 5 coverage # 12,270 between 0-0.25 # 7572 0.25-.5 # 7323 0.5-0.75 # 18559 >0.75 # 46k have non 0 ratios m3c['ratio'].hist(bins=10); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. plt.axis([0, 1, 0, 20000]) plt.xlabel('Methylation(%)', fontsize=20) plt.ylabel('count', fontsize= 20) m3c['ratio'].hist(bins=100); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. #plt.axis([0, 1, 0, 20000]) plt.xlabel('Methylation(%)', fontsize=20) plt.ylabel('count', fontsize= 20) plt.title('Sperm (M3)', fontsize= 20) m3c['ratio'].hist(bins=10); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. #plt.axis([0, 1, 0, 20000]) plt.xlabel('Methylation(%)', fontsize=20) plt.ylabel('count', fontsize= 20) plt.title('Sperm (M3)', fontsize= 20) 12270 + 7572 + 7323 + 18559 45724 + 136751 #3010 CG have > 20x coverage !wc /Volumes/web/cnidarian/BiGo_lar_T3D3_methratio_v9_A.txt !python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -d BiGo_lar_T3D3 /Volumes/web/cnidarian/BiGo_lar_T3D3_methratio_v9_A.txt #45 Million lines !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGO_lar_T3D3] where context like '__CG_' and CT_Count >= 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_T3D3_methratio_CG.txt !head /Volumes/web/cnidarian/BiGo_lar_T3D3_methratio_CG.txt !wc /Volumes/web/cnidarian/BiGo_lar_T3D3_methratio_CG.txt from pandas import * # read data from data file into a pandas DataFrame T3D3cg = read_table("http://eagle.fish.washington.edu/cnidarian/BiGo_lar_T3D3_methratio_CG.txt", # name of the data file #sep=",", # what character separates each column? na_values=["", " "]) # what values should be considered "blank" values? T3D3cg['score'].hist(bins=100); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. #plt.axis([0, 1, 0, 20000]) plt.xlabel('Methylation(%)', fontsize=20) plt.ylabel('count', fontsize= 20) plt.title('Larvae (T3D3)', fontsize= 20) T3D3cg['score'].hist(bins=10); #Axis limits are changed using the axis([xmin, xmax, ymin, ymax]) function. #plt.axis([0, 1, 0, 20000]) plt.xlabel('Methylation(%)', fontsize=20) plt.ylabel('count', fontsize= 20) plt.title('Larvae (T3D3)', fontsize= 20) #37 million lines in methratio file #107k CG #42k CC #129k CA #74k CT #checking out methylation !wc /Volumes/web/cnidarian/BiGo_lar_T3D5_methratio_v9_A.txt !python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -d BiGo_lar_T3D5 /Volumes/web/cnidarian/BiGo_lar_T3D5_methratio_v9_A.txt !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGO_lar_T3D5] where context like '__CG_' and CT_Count > 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_T3D5_methratio_CG.txt !wc /Volumes/web/cnidarian/BiGo_lar_M1_methratio_v9_A.txt #M1 !python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -d BiGo_lar_M1 /Volumes/web/cnidarian/BiGo_lar_M1_methratio_v9_A.txt #Creating GFF of CG 5x !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGo_lar_M1] where context like '__CG_' and CT_Count > 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_M1_methratio_CG.txt !wc /Volumes/web/cnidarian/BiGo_lar_TID3_methratio_v9_A.txt #Creating GFF of CG 5x !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGO_lar_T1D3] where context like '__CG_' and CT_Count > 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_T1D3_methratio_CG.txt !wc /Volumes/web/cnidarian/BiGo_lar_TID5_methratio_v9_A.txt #uploading T1D5 !python /Users/sr320/sqlshare-pythonclient/tools/singleupload.py -d BiGo_lar_T1D5 /Volumes/web/cnidarian/BiGo_lar_TID5_methratio_v9_A.txt #Creating GFF of CG !python /Users/sr320/sqlshare-pythonclient/tools/fetchdata.py -s "SELECT chr as seqname,'methratio' as source,'CpG' as feature, pos as start, pos + 1 as [end], ratio as score, strand, '.' as frame, '.' as attribute FROM [sr320@washington.edu].[BiGo_lar_T1D5] where context like '__CG_' and CT_Count > 5" -f tsv -o /Volumes/web/cnidarian/BiGo_lar_T1D5_methratio_CG.txt from IPython.display import HTML HTML('')