!java -jar samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/web/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff -c /Volumes/web/oyster/bioinformatics/Crassostrea_gigas.GCA_000297895.1.21.dna.genome.fa.fa -o /Users/emmatimminsschiffman/Documents/winter_2014/Bioinformatics/Cg_samifier_out20140220 -l /Users/emmatimminsschiffman/Documents/winter_2014/Bioinformatics/log_20140220 -b /Users/emmatimminsschiffman/Documents/winter_2014/Cg_20140220.bed
!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r
org.apache.commons.cli.MissingOptionException: Missing required options: r, m, g, c, o Version = 1.0.9 usage: samifier [-b <bedFile>] -c <chromosomeDir> -g <genomeFile> [-l <logFile>] -m <mappingFile> -o <outputFile> -r <searchResultsFile> [-s <Confidence Score thresold>] -b <bedFile> Filename to write IGV regions of interest (BED) file to -c <chromosomeDir> Directory containing the chromosome files in FASTA format for the given genome -g <genomeFile> Genome file in gff format -l <logFile> Filename to write the log into -m <mappingFile> File mapping protein identifier to ordered locus name -o <outputFile> Filename to write the SAM format file to -r <searchResultsFile> Mascot search results file in txt format -s <Confidence Score thresold> Minimum confidence score for peptides to be included
# -r <searchResultsFile> Mascot search results file in txt format
!head -50 /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid
<?xml version="1.0" encoding="ISO-8859-1"?> <MzIdentML id="" creationDate="2012-09-20T03:13:18" version="1.1.0" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://psidev.info/files/mzIdentML1.1.0.xsd" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <cvList> <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.53.0" uri="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/> <cv id="UNIMOD" fullName="UNIMOD" version="2013-05-31" uri="http://www.unimod.org/obo/unimod.obo"/> <cv id="UO" fullName="Unit Ontology" version="12:10:2011" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/> </cvList> <AnalysisSoftwareList> <AnalysisSoftware id="AS_peptideprophet_" name="peptideprophet"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001456" name="analysis software" value="peptideprophet"/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="AS_database_refresh_" name="database_refresh"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001456" name="analysis software" value="database_refresh"/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="AS_interact_" name="interact"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001456" name="analysis software" value="interact"/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="AS_Sequest" name="Sequest"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001208" name="SEQUEST" value=""/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="pwiz_3.0.5519" name="ProteoWizard MzIdentML" version="3.0.5519"> <ContactRole contact_ref="ORG_PWIZ"> <Role><cvParam cvRef="MS" accession="MS:1001267" name="software vendor" value=""/></Role> </ContactRole> <SoftwareName><cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard software" value=""/></SoftwareName> </AnalysisSoftware> </AnalysisSoftwareList> <AuditCollection> <Organization id="ORG_PWIZ" name="ProteoWizard"> <cvParam cvRef="MS" accession="MS:1000589" name="contact email" value="support@proteowizard.org"/> </Organization> </AuditCollection> <SequenceCollection> <DBSequence id="DBSeq_CGI_10027476" accession="CGI_10027476" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10022468" accession="CGI_10022468" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008057" accession="CGI_10008057" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008062" accession="CGI_10008062" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008068" accession="CGI_10008068" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008073" accession="CGI_10008073" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008078" accession="CGI_10008078" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008083" accession="CGI_10008083" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10011811" accession="CGI_10011811" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10028133" accession="CGI_10028133" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10014966" accession="CGI_10014966" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10024399" accession="CGI_10024399" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10020326" accession="CGI_10020326" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10024572" accession="CGI_10024572" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10024579" accession="CGI_10024579" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10021674" accession="CGI_10021674" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10003490" accession="CGI_10003490" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/>
!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/we!b/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff -c /Volumes/web/oyster/bioinformatics/Crassostrea_gigas.GCA_000297895.1.21.dna.genome.fa.fa -o /Users/sr320/Desktop/Cg_samifier_out20140220 -l /Users/sr320/Desktop/log_20140220 -b /Users/sr320/Desktop/Cg_20140220.bed
Version = 1.0.9 au.org.intersect.samifier.parser.GenomeFileParsingException: Error in /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff:23 C19392 GLEAN CDS 452 610 . + 0 Parent=CGI_10000015; > Stop of sequence in gene CGI_10000015 overflows gene usage: samifier [-b <bedFile>] -c <chromosomeDir> -g <genomeFile> [-l <logFile>] -m <mappingFile> -o <outputFile> -r <searchResultsFile> [-s <Confidence Score thresold>] -b <bedFile> Filename to write IGV regions of interest (BED) file to -c <chromosomeDir> Directory containing the chromosome files in FASTA format for the given genome -g <genomeFile> Genome file in gff format -l <logFile> Filename to write the log into -m <mappingFile> File mapping protein identifier to ordered locus name -o <outputFile> Filename to write the SAM format file to -r <searchResultsFile> Mascot search results file in txt format -s <Confidence Score thresold> Minimum confidence score for peptides to be included au.org.intersect.samifier.parser.GenomeFileParsingException: Error in /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff:23 C19392 GLEAN CDS 452 610 . + 0 Parent=CGI_10000015; > Stop of sequence in gene CGI_10000015 overflows gene at au.org.intersect.samifier.parser.GenomeParserImpl.throwParsingException(GenomeParserImpl.java:98) at au.org.intersect.samifier.parser.GenomeParserImpl.processSequence(GenomeParserImpl.java:181) at au.org.intersect.samifier.parser.GenomeParserImpl.doParsing(GenomeParserImpl.java:84) at au.org.intersect.samifier.parser.GenomeParserImpl.parseGenomeFile(GenomeParserImpl.java:46) at au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:84) at au.org.intersect.samifier.Samifier.main(Samifier.java:125)
!head -50 /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff
C16582 GLEAN mRNA 35 385 0.555898 - . ID=CGI_10000001; C16582 GLEAN CDS 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . ID=CGI_10000002; C17212 GLEAN CDS 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . ID=CGI_10000003; C17316 GLEAN CDS 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN mRNA 196 387 1 - . ID=CGI_10000005; C17998 GLEAN CDS 196 387 . - 0 Parent=CGI_10000005; C18346 GLEAN mRNA 174 551 1 + . ID=CGI_10000009; C18346 GLEAN CDS 174 551 . + 0 Parent=CGI_10000009; C18428 GLEAN mRNA 286 546 0.555898 - . ID=CGI_10000010; C18428 GLEAN CDS 286 546 . - 0 Parent=CGI_10000010; C18964 GLEAN mRNA 203 658 0.999572 - . ID=CGI_10000011; C18964 GLEAN CDS 203 658 . - 0 Parent=CGI_10000011; C18980 GLEAN mRNA 30 674 0.555898 + . ID=CGI_10000012; C18980 GLEAN CDS 30 674 . + 0 Parent=CGI_10000012; C19100 GLEAN mRNA 160 681 0.999955 - . ID=CGI_10000013; C19100 GLEAN CDS 160 681 . - 0 Parent=CGI_10000013; C19356 GLEAN mRNA 355 597 1 + . ID=CGI_10000014; C19356 GLEAN CDS 355 597 . + 0 Parent=CGI_10000014; C19392 GLEAN mRNA 46 610 1 + . ID=CGI_10000015; C19392 GLEAN CDS 46 183 . + 0 Parent=CGI_10000015; C19392 GLEAN CDS 452 610 . + 0 Parent=CGI_10000015; C19510 GLEAN mRNA 451 702 1 + . ID=CGI_10000016; C19510 GLEAN CDS 451 702 . + 0 Parent=CGI_10000016; C19532 GLEAN mRNA 155 601 0.575455 - . ID=CGI_10000017; C19532 GLEAN CDS 155 601 . - 0 Parent=CGI_10000017; C19570 GLEAN mRNA 208 411 0.555898 - . ID=CGI_10000018; C19570 GLEAN CDS 208 411 . - 0 Parent=CGI_10000018; C19626 GLEAN mRNA 347 814 0.999572 - . ID=CGI_10000019; C19626 GLEAN CDS 347 814 . - 0 Parent=CGI_10000019; C19672 GLEAN mRNA 132 464 1 + . ID=CGI_10000021; C19672 GLEAN CDS 132 464 . + 0 Parent=CGI_10000021; C20188 GLEAN mRNA 437 967 0.999572 - . ID=CGI_10000024; C20188 GLEAN CDS 437 967 . - 0 Parent=CGI_10000024; C20262 GLEAN mRNA 222 1005 1 - . ID=CGI_10000025; C20262 GLEAN CDS 872 1005 . - 0 Parent=CGI_10000025; C20262 GLEAN CDS 642 649 . - 1 Parent=CGI_10000025; C20262 GLEAN CDS 222 538 . - 2 Parent=CGI_10000025; C20282 GLEAN mRNA 330 980 1 + . ID=CGI_10000026; C20282 GLEAN CDS 330 980 . + 0 Parent=CGI_10000026; scaffold1224 GLEAN mRNA 107 775 0.639435 - . ID=CGI_10000027; scaffold1224 GLEAN CDS 107 775 . - 0 Parent=CGI_10000027; C20334 GLEAN mRNA 273 1027 1 - . ID=CGI_10000028; C20334 GLEAN CDS 868 1027 . - 0 Parent=CGI_10000028; C20334 GLEAN CDS 273 523 . - 2 Parent=CGI_10000028; C20412 GLEAN mRNA 73 776 1 - . ID=CGI_10000029; C20412 GLEAN CDS 706 776 . - 0 Parent=CGI_10000029; C20412 GLEAN CDS 410 463 . - 1 Parent=CGI_10000029; C20412 GLEAN CDS 73 214 . - 1 Parent=CGI_10000029;
!sed 's/CDS/exon/g' </Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff> /Volumes/web/cnidarian/ets_v9.gff
!head /Volumes/web/cnidarian/ets_v9.gff
C16582 GLEAN mRNA 35 385 0.555898 - . ID=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . ID=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . ID=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN mRNA 196 387 1 - . ID=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 Parent=CGI_10000005; C18346 GLEAN mRNA 174 551 1 + . ID=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 Parent=CGI_10000009;
!sed 's/mRNA/CDS/g' </Volumes/web/cnidarian/ets_v9.gff> /Volumes/web/cnidarian/ets_v9_b.gff
!head /Volumes/web/cnidarian/ets_v9_b.gff
C16582 GLEAN CDS 35 385 0.555898 - . ID=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . ID=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . ID=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . ID=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 Parent=CGI_10000005; C18346 GLEAN CDS 174 551 1 + . ID=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 Parent=CGI_10000009;
!sed 's/ID=/PGUUID=/g' </Volumes/web/cnidarian/ets_v9_b.gff> /Volumes/web/cnidarian/ets_v9_c.gff
!head /Volumes/web/cnidarian/ets_v9_c.gff
C16582 GLEAN CDS 35 385 0.555898 - . PGUUID=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . PGUUID=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . PGUUID=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . PGUUID=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 Parent=CGI_10000005; C18346 GLEAN CDS 174 551 1 + . PGUUID=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 Parent=CGI_10000009;
!sed 's/Parent=/ID=/g' </Volumes/web/cnidarian/ets_v9_c.gff> /Volumes/web/cnidarian/ets_v9_d.gff
!head /Volumes/web/cnidarian/ets_v9_d.gff
C16582 GLEAN CDS 35 385 0.555898 - . PGUUID=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 ID=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . PGUUID=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 ID=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . PGUUID=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 ID=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . PGUUID=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 ID=CGI_10000005; C18346 GLEAN CDS 174 551 1 + . PGUUID=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 ID=CGI_10000009;
!sed 's/PGUUID=/Parent=/g' </Volumes/web/cnidarian/ets_v9_d.gff> /Volumes/web/cnidarian/ets_v9_f.gff
!head /Volumes/web/cnidarian/ets_v9_f.gff
C16582 GLEAN CDS 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 ID=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 ID=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 ID=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . Parent=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 ID=CGI_10000005; C18346 GLEAN CDS 174 551 1 + . Parent=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 ID=CGI_10000009;
!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar -r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid -m /Volumes/web/oyster/bioinformatics/103B_251_02_mappingfile.txt -g /Volumes/web/cnidarian/ets_v9_f.gff -c /Volumes/web/oyster/bioinformatics/Crassostrea_gigas.GCA_000297895.1.21.dna.genome.fa.fa -o /Users/sr320/Desktop/Cg_samifier_out20140220 -l /Users/sr320/Desktop/log_20140220 -b /Users/sr320/Desktop/Cg_20140220.bed
Start document End document! Run exception thrown java.lang.NumberFormatException: null at java.lang.Integer.parseInt(Integer.java:454) at java.lang.Integer.parseInt(Integer.java:527) at au.org.intersect.samifier.parser.mzidentml.MzidReader.build(MzidReader.java:113) at au.org.intersect.samifier.parser.mzidentml.MzidReader.processEvidence(MzidReader.java:137) at au.org.intersect.samifier.parser.mzidentml.MzidReader.run(MzidReader.java:54) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseMascotPeptideSearchResultsMzidentMLFormat(PeptideSearchResultsParserImpl.java:129) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:79) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56) at au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90) at au.org.intersect.samifier.Samifier.main(Samifier.java:125) Version = 1.0.9 java.lang.NullPointerException usage: samifier [-b <bedFile>] -c <chromosomeDir> -g <genomeFile> [-l <logFile>] -m <mappingFile> -o <outputFile> -r <searchResultsFile> [-s <Confidence Score thresold>] -b <bedFile> Filename to write IGV regions of interest (BED) file to -c <chromosomeDir> Directory containing the chromosome files in FASTA format for the given genome -g <genomeFile> Genome file in gff format -l <logFile> Filename to write the log into -m <mappingFile> File mapping protein identifier to ordered locus name -o <outputFile> Filename to write the SAM format file to -r <searchResultsFile> Mascot search results file in txt format -s <Confidence Score thresold> Minimum confidence score for peptides to be included java.lang.NullPointerException at java.util.ArrayList.addAll(ArrayList.java:559) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56) at au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90) at au.org.intersect.samifier.Samifier.main(Samifier.java:125)
!head /Volumes/web/cnidarian/oyster.v9.fa
>scaffold360 TATTCATATATCATTGAGAATGACAGTTTAAAACAGGATTTTTAATTGTGTTTAATAGGCGACCAACATTTAAGTGTCATTTGTTGAGTTATAAGCGAGTTAAAAAGCTTGTAGTTCTTCGCTATGGAAACAAAACTTTACATTTTGTTTACATTTCAGTTTTAGACCTCAAATGAATGTTTTTATCGTTAGGAACTGTTTATTTATGCTTTAAATTGATAAGAAGATTGACAAATCAGCTTGAAAAAGATTTTTTACTGGTGTATTGAACCCAGATGTA >scaffold18356 TTAAACTGGTACAACAGTATATCCAAGAGAAAAAGGACTTTATAAACAATTTAGAACAACATTGTGGGTCGGATCGGAATTCAAGAACACAGGATCCATTGATGGCTTCTACTATGAAGGGCGATACTACCGCTTGGGGTGACGTCAAATACACTCCGTCATCGACAAAATCTCCTGGATTGTATCCATTACAGGAATTGAAGAAATAACTTTCAATGTAAACAATTTACAAAGATATACATATCTTACGTGAATATCTGGCATGGAAACAACACCTACATGATAGAATAAGACAATATTTCCTACGATACATCCATGGGACATGAAGAAAGTGAAACAGTGTATATCCATATTTAAATTATTATTATAACCGTGTAATCCAAAATGAATTTTATTCCTTCTCATCAGTAAGTACACTGAAAATTGACTACTAAGAAGAGGAAAACACAGAAACGCTAGTTACAACCGATGACCGTTATGAACATAATGCGATTATTTCAGTGAACCGTGTATTAGTGATTTCCTATGTAGGAAGGATTAATATAATTTGCTTCCTATTAGTGTAAT >scaffold20428 AGAAATTATCCAGGGGTATATTCAGTAATTAAATTTGAAAAAAAAATTGGTTTCAACATATTTTATTGAATAAGGAAAATAGTTTGTATAACTTTAGTTACAAGTTCCTCTTTCTCAATTACTAGAAACTGAATACAACATGATAGAAGAAAATTAGATTTACAGGAATCAATTTTAATTGAGACACACTTGCTATATTGGGGTAGTGACACTTTCTTATAACACAAACTGCTTTTATTTTTTCAGCAGTTCAAAACTTTATTTAAGAGTCTGGAAGATTTTTAAATAATTAATTTTCTTTTTGTTATGGCTATTCTTACACCGTATATCACTCTATTTACCATACAATCACATTTTGTTTTGTTAATGCATGTTTAGGAATATAATTACTATAAAGTCAATATCTACAAATATAATGCATTATCACTATGGCAGTTGTATATATTATACATATTATAATCAAGAGAGAAACATGTAAGTTAAAAGAACTTGTTACCCACCCTCTTGTCCAATAACAATAAAATATGTTCAAATCAAATCAAACAAGAATTGATATACATGTACTAGTATATGGAATTTTTGTTCCTTTTGATGATGTATAGTGAAGAGTTTTTTAACCGCGTCACCTTTCGAATTGAACGTGAAAACCAGCAGGTTGTAAATTTCCTCTACACCATTCTTTTATATTTTTTAATTTTCAAATTTATTGTTGTGATGTTGGCTAAGAATCCTCATTTCAATCCCTTAATTATAATGGTACATGTAAATAAAATGGCGAGTGTCTATCTATGACGTACAAAGGAAGAAGCACTTTCCGGTGACGTAGTTATCACAATGCCCATACAGTGCTTGTCGATTTTATCAACTTTGATAAATAGGTTAAGTTGTATGTATACTGTATAGAATTCATGGATCCAAGGTCATGTAATAATCAACGCAAATTCCATATTGTGTTGAACATCAATTTAGTTTGTGTTATAAATTTGTAAAAGTTTATACATTCATACAAGCACCTGCTGTA >scaffold18720 TCATGCCGATGATTTATCATACAAACATACGAGAGAAGTATTTAATAAAGCTGCATTTAGGATAATTATATCATTTTGCCATGCCATGACATATTCTGATCATCTCACATCATTGATAAAAAATTGTGCCTTTAATTCCTACCCTAAAATTAAATATTGAATATTATCATAATATTAAATTCAGTTACCTTAGATGTTCCTTTACTTGAGCCGATAAACTGCATGGTTCTATTCATCTTCACAAATAATATAGGAGACAGCCATGTCACCGAGTTTGAACGTCAGCAGGAGGATGTCTGTACAAAAAAATTGCCTTTAATTTGCTTTCAAATTTTCATTTTAAAGTATTGAGAACATTATTGTATAATAATCACCCATATATAATAAAAATCTGTAAAAGAACATCTGCATACTTACATTATTCCTACAGGCAGCCATGATGATAGCATAAGATCAAGTTACTCTGATGGATTTGTCAAAAATGACATCACAGTTCTTTTGGCGGTAATGTTTCATTCACGAGTGCAGAAAAGTTACTATGCATTGATACTGCATTTCATAAATAGTGTAGACAAGTAAAATGCTTCATGATGGACACACATAATATAGTCATGTACATTTAAAAAAA >scaffold23246 GAATTGATTTGCCACAATTTTTTGTGTGGTCAATCTAAACTAGGTTCCTCAGCTCTGTTTCTAAATTAGCATTCTGTCCTTTTTCAGAATAAAATTTTGCACAACTAACCAGGATCTTGTTAACAATTTTGAGTTAAGTCTAAATATAAATTCAAAAAATCATTTAGAGTTATTAGTAACATGATATACTAGTGTAAGTCATTTTTTGAGAATTCCCAAATGTTCTTTCTGTTGAGTGAAGATACATGTAGGTCACGAATCTGGCACGTTTCCATTTTTTGTAAACTTGTAAAATAAAACATTTTAAAAATTTTTAAAGTAAAAAGGTGGCATTTACAAAATCTGTTTTTAGTTATAAAGAACAATCATATTTAAAATATATAAAAATGCTGTCTTTGGACTCCGAATAATATTCCAAATATATTGTCTGAAAGTTGGACAATTTTCTACTTCAAATATATTATAATACGTACGTGTATTTCCGAATCTGTAGTCTAGCAATGGGTTAACATTGTCACATAAAATTGCATGTTATGGGAAAAACCGGACATTCTATATTTTACTAAACTGTACGGTTGCTTCAACCCATATTGTTTAAAAGCAATGCGATTAAATCAAAGGAAAGCATGGTCAAATGGGAATTCAAATATATAAGTTAAAGAGGAAAAAACAAAACCATTTTATAAACTCGTATATATTTCACTATTAATGAAATAAAAAATAAGCAGTCTCACATGCCCACACACCCCATGACTTGGCAATGCTACACATTACAAATGGCAGAGTACCCTTTATATACCCTTAATAATAACATGGACATAACCACAAAAATCAAAGTACTGAAGTACTTAAACCCGGACTCTTTCGGTGTAATTATGCACATAGTGTAGTTAAATATTCTCTCTCTCTCTCTCTCTCTCTCTTATAATGCTTTGTAAAGAAGTTCATTGAATTAAGAAGAAATTAAACAATAGGAAATAAAAGGAAATTTTAAAAGTTCAGCATGATTCTAAGTTGCTCATGGCTACAGAAAAATGTATAAACTTACCTTTTGATGATGTAACGAAAGCACAGACTGCAAATGTAAAGATTACACCCTTCATTCTATATGAATGTCTATAACTAGGAAGACAGGCTATTTAAATACAAACCTGTTTATGTGATTTTATTTTTAGAAATAACTAAAGTTAGAGCAATACGCAAGTTAAACATGTGGGTGATAGAAATGCATGCAAAACACAGTTACGTCTCTTCAAGCCTTATCAGGCCTTCTCGGCAAAGTTTAGAAAGCATTTTAAATGCATCTGGCATATTCACTTGGTTTTACTTTCCACCTCTTGTATACTGACTGTTGTGATACATTGTACAGTTTAAAATTTAGATAAACTTTTCTTATTATTTGGACTTGATTTCATTTCCAAGAATAATTCTATCCTGTTTTGATTAGCACTGCATATTGAATATTTTCACAAAGTGTTTTATTGTTGCGCGTGTATCTATAGTCTGTTCCAAGTTACACTGACTTTTTTTAATGATTTTGAACAAAAACATGTATACACATATACGTATAACAATACAAGTAAAGTAATAAAGCAAACGTTATTGAAATTAATGATAAAAAAGTGCAAAGACTATTTGTAAAAATACATGTAATCATCAAAAGAACACTATACAGTATAGTCATATGCATAAAAAAAATCAATGGTGTTACTCGTTTATTCTGGAATCTGATATGTAAACTTAAAAAGTCATGAATGTTATAAAACTGTTGATTTTTATTGCTTGATTTTTAATTGGACGATAGTATTAACATGCATTATTACTCTGTTATGTTGAGTATTTATTTTATTTATTCATTTGGTCGAAATTTATCGGCATAATTTGTCGTTATTTTCAAATTTCTGGCGGCATTATTATTCAGTGTTTTAAAATGATTATCACATGACCAA
!/Users/sr320/Desktop/pl.pl /Volumes/web/cnidarian/oyster.v9.fa /Volumes/web/cnidarian/v9_multi
SplitMultiFasta ------------------------------------------------- Splits multi fasta into single sequences Usage: splitMultiFasta.pl InputFile OutputDirectory ------------------------------------------------- ^C
!head C8443.fna
>C8443 CTGGCATCTGGCAAGTTTCTGTAAAAGTAATTTGCAGTACATGCATTGCAAGTACTGGTAATGCACAGACCTTATTGTGATTATGGTAAATGTACTATCGTCCTGTTTAATACAGTATAGTACGCATCCTGCCTATGAC
!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar \
-r /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid \
-m /Users/sr320/Desktop/103B_251_02_mappingfile.txt \
-g /Volumes/web/cnidarian/ets_v9_f.gff \
-c /Users/sr320/Desktop/multi_fasta \
-o /Users/sr320/Desktop/Cg_samifier_out20140220 \
-l /Users/sr320/Desktop/log_20140220 \
-b /Users/sr320/Desktop/Cg_20140220.bed
Start document End document! Run exception thrown java.lang.NumberFormatException: null at java.lang.Integer.parseInt(Integer.java:454) at java.lang.Integer.parseInt(Integer.java:527) at au.org.intersect.samifier.parser.mzidentml.MzidReader.build(MzidReader.java:113) at au.org.intersect.samifier.parser.mzidentml.MzidReader.processEvidence(MzidReader.java:137) at au.org.intersect.samifier.parser.mzidentml.MzidReader.run(MzidReader.java:54) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseMascotPeptideSearchResultsMzidentMLFormat(PeptideSearchResultsParserImpl.java:129) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:79) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56) at au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90) at au.org.intersect.samifier.Samifier.main(Samifier.java:125) Version = 1.0.9 java.lang.NullPointerException usage: samifier [-b <bedFile>] -c <chromosomeDir> -g <genomeFile> [-l <logFile>] -m <mappingFile> -o <outputFile> -r <searchResultsFile> [-s <Confidence Score thresold>] -b <bedFile> Filename to write IGV regions of interest (BED) file to -c <chromosomeDir> Directory containing the chromosome files in FASTA format for the given genome -g <genomeFile> Genome file in gff format -l <logFile> Filename to write the log into -m <mappingFile> File mapping protein identifier to ordered locus name -o <outputFile> Filename to write the SAM format file to -r <searchResultsFile> Mascot search results file in txt format -s <Confidence Score thresold> Minimum confidence score for peptides to be included java.lang.NullPointerException at java.util.ArrayList.addAll(ArrayList.java:559) at au.org.intersect.samifier.parser.PeptideSearchResultsParserImpl.parseResults(PeptideSearchResultsParserImpl.java:56) at au.org.intersect.samifier.runner.SamifierRunner.run(SamifierRunner.java:90) at au.org.intersect.samifier.Samifier.main(Samifier.java:125)
java -jar samifier.jar \
-c cconcisus_ref/genome_dir \
-r Mascot_Result/Glimmer/F188715.dat \
-g pipeline_glimmer.gff \
-m pipeline_glimmer.accession \
-l pipeline_glimmer.log \
-o pipeline_glimmer.sam \
-b pipeline_glimmer.bed
!java -jar /Users/sr320/Desktop/ap11_samifier-master/dist/samifier.jar \
-c /Users/sr320/Desktop/wiki_test_cases/cconcisus_ref/genome_dir \
-r /Users/sr320/Desktop/wiki_test_cases/Mascot_Result/Glimmer/F188715.dat \
-g /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.gff \
-m /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.accession \
-l /Users/sr320/Desktop/pipeline_glimmer.log \
-o /Users/sr320/Desktop/pipeline_glimmer.sam \
-b /Users/sr320/Desktop/pipeline_glimmer.bed
!head /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.accession
orf00002 orf00002 orf00002 orf00003 orf00003 orf00003 orf00004 orf00004 orf00004 orf00005 orf00005 orf00005 orf00006 orf00006 orf00006 orf00007 orf00007 orf00007 orf00008 orf00008 orf00008 orf00009 orf00009 orf00009 orf00011 orf00011 orf00011 orf00012 orf00012 orf00012
!head 103B_251_02_mappingfile.txt
CGI_10000050 CGI_10000050 CGI_10000050 CGI_10000055 CGI_10000055 CGI_10000055 CGI_10000067 CGI_10000067 CGI_10000067 CGI_10000075 CGI_10000075 CGI_10000075 CGI_10000077 CGI_10000077 CGI_10000077 CGI_10000174 CGI_10000174 CGI_10000174 CGI_10000235 CGI_10000235 CGI_10000235 CGI_10000237 CGI_10000237 CGI_10000237 CGI_10000384 CGI_10000384 CGI_10000384 CGI_10000492 CGI_10000492 CGI_10000492
!head -50 /Volumes/web/oyster/bioinformatics/xml_files/interact-20120821_103B_251_QE_02.pep.mzid
<?xml version="1.0" encoding="ISO-8859-1"?> <MzIdentML id="" creationDate="2012-09-20T03:13:18" version="1.1.0" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://psidev.info/files/mzIdentML1.1.0.xsd" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <cvList> <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="3.53.0" uri="http://psidev.cvs.sourceforge.net/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo"/> <cv id="UNIMOD" fullName="UNIMOD" version="2013-05-31" uri="http://www.unimod.org/obo/unimod.obo"/> <cv id="UO" fullName="Unit Ontology" version="12:10:2011" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo"/> </cvList> <AnalysisSoftwareList> <AnalysisSoftware id="AS_peptideprophet_" name="peptideprophet"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001456" name="analysis software" value="peptideprophet"/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="AS_database_refresh_" name="database_refresh"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001456" name="analysis software" value="database_refresh"/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="AS_interact_" name="interact"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001456" name="analysis software" value="interact"/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="AS_Sequest" name="Sequest"> <SoftwareName><cvParam cvRef="MS" accession="MS:1001208" name="SEQUEST" value=""/></SoftwareName> </AnalysisSoftware> <AnalysisSoftware id="pwiz_3.0.5519" name="ProteoWizard MzIdentML" version="3.0.5519"> <ContactRole contact_ref="ORG_PWIZ"> <Role><cvParam cvRef="MS" accession="MS:1001267" name="software vendor" value=""/></Role> </ContactRole> <SoftwareName><cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard software" value=""/></SoftwareName> </AnalysisSoftware> </AnalysisSoftwareList> <AuditCollection> <Organization id="ORG_PWIZ" name="ProteoWizard"> <cvParam cvRef="MS" accession="MS:1000589" name="contact email" value="support@proteowizard.org"/> </Organization> </AuditCollection> <SequenceCollection> <DBSequence id="DBSeq_CGI_10027476" accession="CGI_10027476" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10022468" accession="CGI_10022468" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008057" accession="CGI_10008057" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008062" accession="CGI_10008062" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008068" accession="CGI_10008068" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008073" accession="CGI_10008073" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008078" accession="CGI_10008078" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10008083" accession="CGI_10008083" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10011811" accession="CGI_10011811" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10028133" accession="CGI_10028133" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10014966" accession="CGI_10014966" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10024399" accession="CGI_10024399" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10020326" accession="CGI_10020326" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10024572" accession="CGI_10024572" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10024579" accession="CGI_10024579" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10021674" accession="CGI_10021674" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/> <DBSequence id="DBSeq_CGI_10003490" accession="CGI_10003490" searchDatabase_ref="oyster.v9.glean.final.rename.gff.pep"/>
!head -50 /Users/sr320/Desktop/wiki_test_cases/Mascot_Result/Glimmer/F188715.dat
MIME-Version: 1.0 (Generated by Mascot version 1.0) Content-Type: multipart/mixed; boundary=gc0p4Jq0M2Yt08jU534c0p --gc0p4Jq0M2Yt08jU534c0p Content-Type: application/x-Mascot; name="parameters" LICENSE=Licensed to: Australian Proteomics Computational Facility, (51 processors). MP= NM= COM=Submitted from Test virtual protein by Mascot Daemon on WIN-V0L2R2U0DVL IATOL= IA2TOL= IASTOL= IBTOL= IB2TOL= IBSTOL= IYTOL= IY2TOL= IYSTOL= SEG= SEGT= SEGTU= LTOL= TOL=4 TOLU=ppm ITH= ITOL=0.4 ITOLU=Da PFA=1 DB=contaminants DB2=Campylobacter_PR MODS= MASS=Monoisotopic CLE=Trypsin FILE=C:\Users\ignatius\Documents\PostDoc\2012\ANDS data integration project\Sample Data\Cconcisus Reference proteome\RawSpectra\BAA-1457 Reference\mzML\Nadeem_29_7_10_1.mzML PEAK= QUE= TWO= SEARCH=MIS USERNAME=Carlos Aya USEREMAIL=carlos@intersect.org.au CHARGE=2+ and 3+ INTERMEDIATE= REPORT=AUTO OVERVIEW= FORMAT=mzML (.mzML) FORMVER=1.01 FRAG= IT_MODS=Carbamidomethyl (C),Oxidation (M) USER00=
!head /Users/sr320/Desktop/wiki_test_cases/pipeline_glimmer.accession
orf00002 orf00002 orf00002 orf00003 orf00003 orf00003 orf00004 orf00004 orf00004 orf00005 orf00005 orf00005 orf00006 orf00006 orf00006 orf00007 orf00007 orf00007 orf00008 orf00008 orf00008 orf00009 orf00009 orf00009 orf00011 orf00011 orf00011 orf00012 orf00012 orf00012
!wget http://eagle.fish.washington.edu/oyster/bioinformatics/oyster.v9.glean.final.rename.gff
--2014-03-12 09:08:26-- http://eagle.fish.washington.edu/oyster/bioinformatics/oyster.v9.glean.final.rename.gff Resolving eagle.fish.washington.edu... 128.95.149.81 Connecting to eagle.fish.washington.edu|128.95.149.81|:80... connected. HTTP request sent, awaiting response... 200 OK Length: 14179523 (14M) [text/plain] Saving to: `oyster.v9.glean.final.rename.gff' 100%[======================================>] 14,179,523 45.9M/s in 0.3s 2014-03-12 09:08:26 (45.9 MB/s) - `oyster.v9.glean.final.rename.gff' saved [14179523/14179523]
cd Desktop
/Users/sr320/Desktop
!head /Volumes/web/cnidarian/ets_v9_f.gff
C16582 GLEAN CDS 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 ID=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 ID=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 ID=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . Parent=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 ID=CGI_10000005; C18346 GLEAN CDS 174 551 1 + . Parent=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 ID=CGI_10000009;
!head /Volumes/web/cnidarian/ets_v9_g.gff
C16582 GLEAN CDS 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 ID=CGI_10000001; C16582 GLEAN gene 35 385 0.555898 - . Name=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 ID=CGI_10000002; C17212 GLEAN gene 31 363 0.999572 + . Name=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 ID=CGI_10000003; C17316 GLEAN gene 30 257 0.555898 + . Name=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . Parent=CGI_10000005;
!head /Volumes/web/cnidarian/ets_cgigas_v9_11.gff
C16582 GLEAN mRNA 35 385 0.555898 - . ID=CGI_10000001; C16582 GLEAN CDS 35 385 . - 0 Parent=CGI_10000001; C16582 GLEAN gene 35 385 . - 0 Name=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . ID=CGI_10000002; C17212 GLEAN CDS 31 363 . + 0 Parent=CGI_10000002; C17212 GLEAN gene 31 363 . + 0 Name=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . ID=CGI_10000003; C17316 GLEAN CDS 30 257 . + 0 Parent=CGI_10000003; C17316 GLEAN gene 30 257 . + 0 Name=CGI_10000003; C17476 GLEAN mRNA 34 257 0.998947 - . ID=CGI_10000004;
!sed 's/ID/Parent/g' </Volumes/web/cnidarian/ets_cgigas_v9_11.gff> /Volumes/web/cnidarian/ets_cgigas_v9_12.gff
!head /Volumes/web/cnidarian/ets_cgigas_v9_12.gff
C16582 GLEAN mRNA 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN CDS 35 385 . - 0 Parent=CGI_10000001; C16582 GLEAN gene 35 385 . - 0 Name=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN CDS 31 363 . + 0 Parent=CGI_10000002; C17212 GLEAN gene 31 363 . + 0 Name=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN CDS 30 257 . + 0 Parent=CGI_10000003; C17316 GLEAN gene 30 257 . + 0 Name=CGI_10000003; C17476 GLEAN mRNA 34 257 0.998947 - . Parent=CGI_10000004;
!sed 's/CDS/mRNA/g' </Volumes/web/cnidarian/ets_v9_h.gff> /Volumes/web/cnidarian/ets_v9_i.gff
!head /Volumes/web/cnidarian/ets_v9_i.gff
C16582 GLEAN mRNA 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 Parent=CGI_10000001; C16582 GLEAN gene 35 385 0.555898 - . Name=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 Parent=CGI_10000002; C17212 GLEAN gene 31 363 0.999572 + . Name=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 Parent=CGI_10000003; C17316 GLEAN gene 30 257 0.555898 + . Name=CGI_10000003; C17998 GLEAN mRNA 196 387 1 - . Parent=CGI_10000005;
!sed 's/ID/Parent/g' </Volumes/web/cnidarian/ets_v9_g.gff> /Volumes/web/cnidarian/ets_v9_h.gff
!head -50 /Volumes/web/cnidarian/ets_v9_f.gff
C16582 GLEAN CDS 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 ID=CGI_10000001; C17212 GLEAN CDS 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 ID=CGI_10000002; C17316 GLEAN CDS 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 ID=CGI_10000003; C17998 GLEAN CDS 196 387 1 - . Parent=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 ID=CGI_10000005; C18346 GLEAN CDS 174 551 1 + . Parent=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 ID=CGI_10000009; C18428 GLEAN CDS 286 546 0.555898 - . Parent=CGI_10000010; C18428 GLEAN exon 286 546 . - 0 ID=CGI_10000010; C18964 GLEAN CDS 203 658 0.999572 - . Parent=CGI_10000011; C18964 GLEAN exon 203 658 . - 0 ID=CGI_10000011; C18980 GLEAN CDS 30 674 0.555898 + . Parent=CGI_10000012; C18980 GLEAN exon 30 674 . + 0 ID=CGI_10000012; C19100 GLEAN CDS 160 681 0.999955 - . Parent=CGI_10000013; C19100 GLEAN exon 160 681 . - 0 ID=CGI_10000013; C19356 GLEAN CDS 355 597 1 + . Parent=CGI_10000014; C19356 GLEAN exon 355 597 . + 0 ID=CGI_10000014; C19392 GLEAN CDS 46 610 1 + . Parent=CGI_10000015; C19392 GLEAN exon 46 183 . + 0 ID=CGI_10000015; C19392 GLEAN exon 452 610 . + 0 ID=CGI_10000015; C19510 GLEAN CDS 451 702 1 + . Parent=CGI_10000016; C19510 GLEAN exon 451 702 . + 0 ID=CGI_10000016; C19532 GLEAN CDS 155 601 0.575455 - . Parent=CGI_10000017; C19532 GLEAN exon 155 601 . - 0 ID=CGI_10000017; C19570 GLEAN CDS 208 411 0.555898 - . Parent=CGI_10000018; C19570 GLEAN exon 208 411 . - 0 ID=CGI_10000018; C19626 GLEAN CDS 347 814 0.999572 - . Parent=CGI_10000019; C19626 GLEAN exon 347 814 . - 0 ID=CGI_10000019; C19672 GLEAN CDS 132 464 1 + . Parent=CGI_10000021; C19672 GLEAN exon 132 464 . + 0 ID=CGI_10000021; C20188 GLEAN CDS 437 967 0.999572 - . Parent=CGI_10000024; C20188 GLEAN exon 437 967 . - 0 ID=CGI_10000024; C20262 GLEAN CDS 222 1005 1 - . Parent=CGI_10000025; C20262 GLEAN exon 872 1005 . - 0 ID=CGI_10000025; C20262 GLEAN exon 642 649 . - 1 ID=CGI_10000025; C20262 GLEAN exon 222 538 . - 2 ID=CGI_10000025; C20282 GLEAN CDS 330 980 1 + . Parent=CGI_10000026; C20282 GLEAN exon 330 980 . + 0 ID=CGI_10000026; scaffold1224 GLEAN CDS 107 775 0.639435 - . Parent=CGI_10000027; scaffold1224 GLEAN exon 107 775 . - 0 ID=CGI_10000027; C20334 GLEAN CDS 273 1027 1 - . Parent=CGI_10000028; C20334 GLEAN exon 868 1027 . - 0 ID=CGI_10000028; C20334 GLEAN exon 273 523 . - 2 ID=CGI_10000028; C20412 GLEAN CDS 73 776 1 - . Parent=CGI_10000029; C20412 GLEAN exon 706 776 . - 0 ID=CGI_10000029; C20412 GLEAN exon 410 463 . - 1 ID=CGI_10000029; C20412 GLEAN exon 73 214 . - 1 ID=CGI_10000029;
!sed 's/CDS/gene/g' </Volumes/web/cnidarian/ets_v9_f.gff> /Volumes/web/cnidarian/ets_v9_j.gff
!sed 's/Parent/Name/g' </Volumes/web/cnidarian/ets_v9_j.gff> /Volumes/web/cnidarian/ets_v9_k.gff
!head /Volumes/web/cnidarian/ets_v9_k.gff
C16582 GLEAN gene 35 385 0.555898 - . Name=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 ID=CGI_10000001; C17212 GLEAN gene 31 363 0.999572 + . Name=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 ID=CGI_10000002; C17316 GLEAN gene 30 257 0.555898 + . Name=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 ID=CGI_10000003; C17998 GLEAN gene 196 387 1 - . Name=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 ID=CGI_10000005; C18346 GLEAN gene 174 551 1 + . Name=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 ID=CGI_10000009;
!sed 's/ID/Parent/g' </Volumes/web/cnidarian/ets_v9_k.gff> /Volumes/web/cnidarian/ets_v9_l.gff
!head /Volumes/web/cnidarian/ets_v9_l.gff
C16582 GLEAN gene 35 385 0.555898 - . Name=CGI_10000001; C16582 GLEAN exon 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN gene 31 363 0.999572 + . Name=CGI_10000002; C17212 GLEAN exon 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN gene 30 257 0.555898 + . Name=CGI_10000003; C17316 GLEAN exon 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN gene 196 387 1 - . Name=CGI_10000005; C17998 GLEAN exon 196 387 . - 0 Parent=CGI_10000005; C18346 GLEAN gene 174 551 1 + . Name=CGI_10000009; C18346 GLEAN exon 174 551 . + 0 Parent=CGI_10000009;
!sed 's/exon/CDS/g' </Volumes/web/cnidarian/ets_v9_l.gff> /Volumes/web/cnidarian/ets_v9_m.gff
!tail -50 /Volumes/web/cnidarian/ets_v9_o.gff
scaffold998 GLEAN CDS 116508 117557 . - 0 Parent=CGI_10006465; scaffold998 GLEAN mRNA 3874 24837 0.410057 + . Parent=CGI_10006458; scaffold998 GLEAN mRNA 25310 28141 0.813029 + . Parent=CGI_10006459; scaffold998 GLEAN mRNA 43399 61271 0.812368 + . Parent=CGI_10006460; scaffold998 GLEAN mRNA 67122 73547 0.999674 + . Parent=CGI_10006461; scaffold998 GLEAN mRNA 73620 80841 0.721465 + . Parent=CGI_10006462; scaffold998 GLEAN mRNA 89231 90958 0.999202 + . Parent=CGI_10006463; scaffold998 GLEAN mRNA 91399 104169 0.99895 - . Parent=CGI_10006464; scaffold998 GLEAN mRNA 116508 131912 0.996071 - . Parent=CGI_10006465; scaffold999 GLEAN gene 23147 24888 0.654733 + . Name=CGI_10006969; scaffold999 GLEAN CDS 23147 23288 . + 0 Parent=CGI_10006969; scaffold999 GLEAN CDS 24070 24204 . + 2 Parent=CGI_10006969; scaffold999 GLEAN CDS 24468 24581 . + 2 Parent=CGI_10006969; scaffold999 GLEAN CDS 24695 24888 . + 2 Parent=CGI_10006969; scaffold999 GLEAN gene 39254 45360 1 - . Name=CGI_10006970; scaffold999 GLEAN CDS 45195 45360 . - 0 Parent=CGI_10006970; scaffold999 GLEAN CDS 39995 40053 . - 2 Parent=CGI_10006970; scaffold999 GLEAN CDS 39701 39892 . - 0 Parent=CGI_10006970; scaffold999 GLEAN CDS 39254 39460 . - 0 Parent=CGI_10006970; scaffold999 GLEAN gene 47971 57911 0.937649 - . Name=CGI_10006971; scaffold999 GLEAN CDS 57819 57911 . - 0 Parent=CGI_10006971; scaffold999 GLEAN CDS 56427 56498 . - 0 Parent=CGI_10006971; scaffold999 GLEAN CDS 53710 53847 . - 0 Parent=CGI_10006971; scaffold999 GLEAN CDS 52115 52144 . - 0 Parent=CGI_10006971; scaffold999 GLEAN CDS 51101 51218 . - 0 Parent=CGI_10006971; scaffold999 GLEAN CDS 48506 48655 . - 2 Parent=CGI_10006971; scaffold999 GLEAN CDS 47971 48086 . - 2 Parent=CGI_10006971; scaffold999 GLEAN gene 88395 99702 0.482874 - . Name=CGI_10006972; scaffold999 GLEAN CDS 99690 99702 . - 0 Parent=CGI_10006972; scaffold999 GLEAN CDS 96192 96278 . - 2 Parent=CGI_10006972; scaffold999 GLEAN CDS 95628 95712 . - 2 Parent=CGI_10006972; scaffold999 GLEAN CDS 93995 94089 . - 1 Parent=CGI_10006972; scaffold999 GLEAN CDS 88395 88534 . - 2 Parent=CGI_10006972; scaffold999 GLEAN gene 107744 126675 0.391684 + . Name=CGI_10006973; scaffold999 GLEAN CDS 107744 107798 . + 0 Parent=CGI_10006973; scaffold999 GLEAN CDS 115160 115384 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 116967 117221 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 118234 118362 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 120708 120818 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 121053 121184 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 122398 122535 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 123424 123555 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 124859 124996 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 126012 126143 . + 2 Parent=CGI_10006973; scaffold999 GLEAN CDS 126617 126675 . + 2 Parent=CGI_10006973; scaffold999 GLEAN mRNA 23147 24888 0.654733 + . Parent=CGI_10006969; scaffold999 GLEAN mRNA 39254 45360 1 - . Parent=CGI_10006970; scaffold999 GLEAN mRNA 47971 57911 0.937649 - . Parent=CGI_10006971; scaffold999 GLEAN mRNA 88395 99702 0.482874 - . Parent=CGI_10006972; scaffold999 GLEAN mRNA 107744 126675 0.391684 + . Parent=CGI_10006973;
!head /Volumes/web/oyster/bioinformatics/oyster.v9.glean.final.rename.fixed.gff
C16582 GLEAN mRNA 35 385 0.555898 - . ID=CGI_10000001; C16582 GLEAN CDS 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . ID=CGI_10000002; C17212 GLEAN CDS 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . ID=CGI_10000003; C17316 GLEAN CDS 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN mRNA 196 387 1 - . ID=CGI_10000005; C17998 GLEAN CDS 196 387 . - 0 Parent=CGI_10000005; C18346 GLEAN mRNA 174 551 1 + . ID=CGI_10000009; C18346 GLEAN CDS 174 551 . + 0 Parent=CGI_10000009;
!head /Volumes/web/cnidarian/ets_v9_p.gff
C16582 GLEAN mRNA 35 385 0.555898 - . Parent=CGI_10000001; C16582 GLEAN gene 35 385 . - . Name=CGI_10000001; C16582 GLEAN CDS 35 385 . - 0 Parent=CGI_10000001; C17212 GLEAN mRNA 31 363 0.999572 + . Parent=CGI_10000002; C17212 GLEAN gene 31 363 . + . Name=CGI_10000002; C17212 GLEAN CDS 31 363 . + 0 Parent=CGI_10000002; C17316 GLEAN mRNA 30 257 0.555898 + . Parent=CGI_10000003; C17316 GLEAN gene 30 257 . + . Name=CGI_10000003; C17316 GLEAN CDS 30 257 . + 0 Parent=CGI_10000003; C17998 GLEAN mRNA 196 387 1 - . Parent=CGI_10000005;
!head /Volumes/web/cnidarian/Crassostrea_gigas.GCA_000297895.1.21.gff3
##gff-version 3 scaffold1611 protein_coding gene 1263 9963 . - . ID=CGI_10014322;Name=CGI_10014322 scaffold1611 protein_coding mRNA 1263 9963 . - . ID=EKC25967;Parent=CGI_10014322 scaffold1611 protein_coding start_codon 9961 9963 . - 0 ID=start_codon:EKC25967:1;Parent=EKC25967 scaffold1611 protein_coding exon 9922 9963 . - . ID=exon:EKC25967:1;Parent=EKC25967 scaffold1611 protein_coding exon 8502 8667 . - . ID=exon:EKC25967:2;Parent=EKC25967 scaffold1611 protein_coding exon 7374 7534 . - . ID=exon:EKC25967:3;Parent=EKC25967 scaffold1611 protein_coding exon 3861 4046 . - . ID=exon:EKC25967:4;Parent=EKC25967 scaffold1611 protein_coding exon 1635 1742 . - . ID=exon:EKC25967:5;Parent=EKC25967 scaffold1611 protein_coding exon 1263 1268 . - . ID=exon:EKC25967:6;Parent=EKC25967
!head /Volumes/web/whale/wiki_test_cases/pipeline_glimmer.gff
##gff-version 3 NC_009802 Glimmer gene 1 1311 16.88 + 0 Name=orf00002;ID=orf00002; NC_009802 Glimmer CDS 1 1311 16.88 + 0 Name=orf00002;Parent=orf00002; NC_009802 Glimmer gene 1319 1465 1.61 + 0 Name=orf00003;ID=orf00003; NC_009802 Glimmer CDS 1319 1465 1.61 + 0 Name=orf00003;Parent=orf00003; NC_009802 Glimmer gene 1465 2532 15.89 + 0 Name=orf00004;ID=orf00004; NC_009802 Glimmer CDS 1465 2532 15.89 + 0 Name=orf00004;Parent=orf00004; NC_009802 Glimmer gene 2550 4859 16.58 + 0 Name=orf00005;ID=orf00005; NC_009802 Glimmer CDS 2550 4859 16.58 + 0 Name=orf00005;Parent=orf00005; NC_009802 Glimmer gene 4878 5474 16.37 + 0 Name=orf00006;ID=orf00006;