!head /Volumes/web/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_CG.gff
##gff-version 3 ##sequence-region scaffold360 1 280 #!Date 2013-04-23 #!Type DNA #!Source-version EMBOSS 6.5.7.0 scaffold360 fuzznuc nucleotide_motif 60 61 2 + . ID=scaffold360.1;note=*pat pattern:CG scaffold360 fuzznuc nucleotide_motif 96 97 2 + . ID=scaffold360.2;note=*pat pattern:CG scaffold360 fuzznuc nucleotide_motif 120 121 2 + . ID=scaffold360.3;note=*pat pattern:CG scaffold360 fuzznuc nucleotide_motif 187 188 2 + . ID=scaffold360.4;note=*pat pattern:CG ##gff-version 3
output not gff
!/Volumes/Bay3/Software/EMBOSS-6.5.7/emboss/fuzznuc -sequence /Volumes/web/whale/ensembl/ftp.ensemblgenomes.org/pub/release-21/metazoa/fasta/crassostrea_gigas/dna/Crassostrea_gigas.GCA_000297895.1.21.dna_sm.genome.fa -rformat gff -pattern CG -outfile /Volumes/web/cnidarian/CGmotif_ensembl_sm.gff
Search for patterns in nucleotide sequences
!fgrep -c "CG" /Volumes/web/whale/ensembl/ftp.ensemblgenomes.org/pub/release-21/metazoa/fasta/crassostrea_gigas/dna/Crassostrea_gigas.GCA_000297895.1.21.dna_sm.genome.fa
4910271
!fgrep -c "cg" /Volumes/web/whale/ensembl/ftp.ensemblgenomes.org/pub/release-21/metazoa/fasta/crassostrea_gigas/dna/Crassostrea_gigas.GCA_000297895.1.21.dna_sm.genome.fa
223309
!head -20 /Volumes/web/cnidarian/CGmotif_ensembl_sm.gff
##gff-version 3 ##sequence-region C12728 1 200 #!Date 2014-04-14 #!Type DNA #!Source-version EMBOSS 6.5.7.0 C12728 fuzznuc nucleotide_motif 40 41 2 + . ID=C12728.1;note=*pat pattern:CG C12728 fuzznuc nucleotide_motif 62 63 2 + . ID=C12728.2;note=*pat pattern:CG C12728 fuzznuc nucleotide_motif 164 165 2 + . ID=C12728.3;note=*pat pattern:CG C12728 fuzznuc nucleotide_motif 193 194 2 + . ID=C12728.4;note=*pat pattern:CG ##gff-version 3 ##sequence-region C12722 1 200 #!Date 2014-04-14 #!Type DNA #!Source-version EMBOSS 6.5.7.0 C12722 fuzznuc nucleotide_motif 104 105 2 + . ID=C12722.1;note=*pat pattern:CG C12722 fuzznuc nucleotide_motif 134 135 2 + . ID=C12722.2;note=*pat pattern:CG C12722 fuzznuc nucleotide_motif 154 155 2 + . ID=C12722.3;note=*pat pattern:CG ##gff-version 3 ##sequence-region C12706 1 200 #!Date 2014-04-14
!fgrep -c "fuzznuc nucleotide_motif" /Volumes/web/cnidarian/CGmotif_ensembl_sm.gff
9936143
!head -100 /Volumes/web/whale/ensembl/ftp.ensemblgenomes.org/pub/release-21/metazoa/gff3/crassostrea_gigas/Crassostrea_gigas.GCA_000297895.1.21.gff3
##gff-version 3 ##sequence-region C12728 1 200 C12728 . repeat_region 38 81 . ? . Name=trf;class=trf;repeat_consensus=GCCGCTTTTATGTCTCTTTCTG;type=Tandem repeats ##sequence-region C12722 1 200 ##sequence-region C12706 1 200 ##sequence-region C12718 1 200 ##sequence-region C12732 1 200 ##sequence-region C12726 1 200 ##sequence-region C12708 1 200 ##sequence-region C12730 1 200 ##sequence-region C12724 1 200 C12724 . repeat_region 1 96 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust C12724 . repeat_region 20 127 . ? . Name=trf;class=trf;repeat_consensus=TCATCAACATGAACATCATCATCGTCG;type=Tandem repeats C12724 . repeat_region 47 103 . ? . Name=trf;class=trf;repeat_consensus=TCGTCATCATCAACATAACATCA;type=Tandem repeats ##sequence-region C12734 1 200 C12734 . repeat_region 149 196 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12782 1 201 ##sequence-region C12766 1 201 ##sequence-region C12768 1 201 ##sequence-region C12738 1 201 ##sequence-region C12758 1 201 ##sequence-region C12742 1 201 ##sequence-region C12774 1 201 ##sequence-region C12760 1 201 ##sequence-region C12772 1 201 ##sequence-region C12764 1 201 ##sequence-region C12780 1 201 C12780 . repeat_region 158 165 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12778 1 201 ##sequence-region C12784 1 201 ##sequence-region C12748 1 201 ##sequence-region C12802 1 202 C12802 . repeat_region 52 59 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12806 1 202 C12806 . repeat_region 149 157 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12786 1 202 ##sequence-region C12788 1 202 ##sequence-region C12810 1 202 ##sequence-region C12828 1 202 C12828 . repeat_region 1 202 . ? . Name=trf;class=trf;repeat_consensus=ATGTAGCAGTAGGTAATTCAAGCA;type=Tandem repeats C12828 . repeat_region 1 202 . ? . Name=trf;class=trf;repeat_consensus=ATGTAGCAGTCGGTAATTCAAGCAATGTAGCAGTCGGTAATTCAAGCTATGTAGCAGTAGGTAATTCAAGCA;type=Tandem repeats ##sequence-region C12822 1 202 ##sequence-region C12824 1 202 ##sequence-region C12814 1 202 ##sequence-region C12830 1 202 C12830 . repeat_region 33 88 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12798 1 202 ##sequence-region C12836 1 203 C12836 . repeat_region 72 79 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12832 1 203 ##sequence-region C12856 1 203 ##sequence-region C12866 1 203 ##sequence-region C12840 1 203 ##sequence-region C12838 1 203 C12838 . repeat_region 12 63 . ? . Name=trf;class=trf;repeat_consensus=TGTC;type=Tandem repeats C12838 . repeat_region 12 64 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust C12838 . repeat_region 17 66 . ? . Name=trf;class=trf;repeat_consensus=GTCTGTCCGTCC;type=Tandem repeats ##sequence-region C12848 1 203 ##sequence-region C12834 1 203 ##sequence-region C12860 1 203 ##sequence-region C12842 1 203 ##sequence-region C12882 1 204 ##sequence-region C12890 1 204 ##sequence-region C12872 1 204 ##sequence-region C12886 1 204 C12886 . repeat_region 14 41 . ? . Name=trf;class=trf;repeat_consensus=AAGGC;type=Tandem repeats C12886 . repeat_region 102 156 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12880 1 204 ##sequence-region C12906 1 204 ##sequence-region C12876 1 204 ##sequence-region C12870 1 204 ##sequence-region C12888 1 204 ##sequence-region C12948 1 205 C12948 . repeat_region 117 203 . ? . Name=trf;class=trf;repeat_consensus=TGAATTGAACTTTTAGGATA;type=Tandem repeats ##sequence-region C12964 1 205 ##sequence-region C12960 1 205 C12960 . repeat_region 188 196 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12916 1 205 ##sequence-region C12924 1 205 C12924 . repeat_region 5 171 . ? . Name=trf;class=trf;repeat_consensus=TGAATTCGTTATAACGAATTTAAATCTGTTATAACAAATTGA;type=Tandem repeats C12924 . repeat_region 12 126 . ? . Name=trf;class=trf;repeat_consensus=GTTATAACGAATTTAAATC;type=Tandem repeats ##sequence-region C12932 1 205 C12932 . repeat_region 23 72 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust C12932 . repeat_region 23 72 . ? . Name=trf;class=trf;repeat_consensus=TGTCCGTC;type=Tandem repeats C12932 . repeat_region 24 69 . ? . Name=trf;class=trf;repeat_consensus=GTCC;type=Tandem repeats ##sequence-region C12958 1 205 ##sequence-region C12918 1 205 ##sequence-region C12950 1 205 C12950 . repeat_region 2 205 . ? . Name=trf;class=trf;repeat_consensus=CACTATTGATGATGTCGTTGGGCTATTGGCACTGGT;type=Tandem repeats ##sequence-region C12956 1 205 ##sequence-region C12938 1 205 ##sequence-region C12954 1 205 C12954 . repeat_region 52 96 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust ##sequence-region C12928 1 205 ##sequence-region C12942 1 205 ##sequence-region C12912 1 205 ##sequence-region C12934 1 205 ##sequence-region C12930 1 205 ##sequence-region C12994 1 206 C12994 . repeat_region 187 203 . ? . Name=dust;class=dust;repeat_consensus=N;type=Dust
!/Volumes/Bay3/Software/bedtools-2.17.0/bin/intersectBed -loj -a /Volumes/web/cnidarian/CGmotif_ensembl_sm.gff -b /Volumes/web/whale/ensembl/ftp.ensemblgenomes.org/pub/release-21/metazoa/gff3/crassostrea_gigas/Crassostrea_gigas.GCA_000297895.1.21.gff3 > /Volumes/web/cnidarian/CGmotif_intersect_feature
!fgrep -c "fuzznuc nucleotide_motif" /Volumes/web/cnidarian/CGmotif_intersect_feature
16296813
#uploading to SQLSHARE
spd="/Users/sr320/sqlshare-pythonclient/tools/"
!python {spd}singleupload.py -d CGannot_Ensembl_feat /Volumes/web/cnidarian/CGmotif_intersect_feature
processing chunk line 0 to 657108 (2.6846268177 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 69C7611D... processing chunk line 657108 to 1206969 (384.398473978 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 9429AC87... processing chunk line 1206969 to 1752759 (616.887005806 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 959131F6... processing chunk line 1752759 to 2282289 (925.492045879 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 2811835F... processing chunk line 2282289 to 2808921 (1166.18363285 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing A9FF06D1... processing chunk line 2808921 to 3332483 (1400.51269984 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 6CBD2B91... processing chunk line 3332483 to 3846094 (1698.62651396 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 684CD4BD... processing chunk line 3846094 to 4359769 (1941.41357589 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 2344F22F... processing chunk line 4359769 to 4873172 (2161.7397759 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 00BF8256... processing chunk line 4873172 to 5386517 (2395.16768193 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 9ED64889... processing chunk line 5386517 to 5905639 (2630.70829082 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 3ACE407C... processing chunk line 5905639 to 6426533 (2879.14030886 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 4D377FD1... processing chunk line 6426533 to 6937019 (3131.77666283 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 9DB40C28... processing chunk line 6937019 to 7457512 (3338.09166694 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 6C41EF61... processing chunk line 7457512 to 7967905 (3626.65325093 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing A0E8B2B7... processing chunk line 7967905 to 8493500 (4040.02613378 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 92AB3679... processing chunk line 8493500 to 9016541 (4476.16360092 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 2B629C49... processing chunk line 9016541 to 9532320 (4790.02325392 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 2AC4878F... processing chunk line 9532320 to 10043405 (5222.42379093 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing F785CD3A... processing chunk line 10043405 to 10568622 (5525.34709191 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 6F6628E5... processing chunk line 10568622 to 11080379 (6003.6101079 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 2038A1DD... processing chunk line 11080379 to 11595405 (6215.55741787 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 836D66A7... processing chunk line 11595405 to 12117200 (6482.20461988 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 00D1A75F... processing chunk line 12117200 to 12629085 (6827.36811996 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 6899B947... processing chunk line 12629085 to 13141826 (7120.6570859 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 9D65513F... processing chunk line 13141826 to 13655858 (7414.73094296 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing A8E9813F... processing chunk line 13655858 to 14175744 (7624.57059598 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 33816E0C... processing chunk line 14175744 to 14683173 (7843.50774193 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing C2A52351... processing chunk line 14683173 to 15203211 (8131.42966795 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing C319D786... processing chunk line 15203211 to 15710224 (8490.53375196 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 2DA15F06... processing chunk line 15710224 to 16224928 (8869.11250401 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing 4EE0EF6F... processing chunk line 16224928 to 16296813 (9075.72935987 s elapsed) pushing /Volumes/web/cnidarian/CGmotif_intersect_feature... parsing CE29D444... finished CGannot_Ensembl_feat