import avro.protocol as avpr ADAM_formats = avpr.parse(open("adam.avpr", "r").read()) sorted([(v.type, k) for k, v in ADAM_formats.types_dict.items()]) from avro.datafile import DataFileReader, DataFileWriter from avro.io import DatumReader, DatumWriter ADAM_contig_schema = ADAM_formats.types_dict['ADAMContig'] ADAM_contig_schema.to_json() # this is the Python representation of the Avro object. ADAM_contig = {'contigId': 9230, 'contigName':"1Nabc", 'contigLength':7781, 'contigMD5':"8743b52063cd84097a65d1633f5c74f5", 'referenceURL': 'http://data.dna/223'} # mock data with DataFileWriter(open("contigs.avro", "w"), DatumWriter(), ADAM_contig_schema) as contig_writer: contig_writer.append(ADAM_contig) with DataFileReader(open("contigs.avro", "r"), DatumReader()) as contigs: for contig in contigs: print contig ADAM_record = {"referenceName": "20", "referenceId": 19, "start": 19893804, "mapq": 60, "readName": "20GAVAAXX100126:4:64:6132:191287", "sequence": "GTTTTCTATGAAGTTATTTTCTAGGGATTCTGTTTTGTTGTCGTTGTTCACACTGTAGCTCTCAGATCTTACTGTTTTTTTTTTAATTGTGATAAAGCATA", "mateReference": "20", "mateAlignmentStart": 19893476, "cigar": "101M", "qual": "EHHHFGDGHFHF8EEFB=B@=GHFFBAA@8??>IHHEIHEH>EHH@HFG>GEGHEGHFAHHHHHHHHHHHHFHGHGHGHHHHHHF@HFHHHHHHHHHGGGGGGGHHHHHHHHHHHHHHHHHHHGHHH\tMQ:i:60\tBQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\tXO:i:0\tXM:i:0\tSM:i:37\tNM:i:0\tAM:i:37\tXG:i:0\tRG:Z:20GAV.4\tX1:i:0\tX0:i:1", "recordGroupSequencingCenter": "BI", "recordGroupDescription": None, "recordGroupRunDateEpoch": None, "recordGroupFlowOrder": None, "recordGroupKeySequence": None, "recordGroupLibrary": "Solexa-18484", "recordGroupPredictedMedianInsertSize": None, "recordGroupPlatform": "illumina", "recordGroupPlatformUnit": "20GAVAAXX100126.4", "recordGroupSample": "NA12878", "mateReferenceId": 19, "referenceLength": 63025520, "referenceUrl": None, "mateReferenceLength": 63025520, "mateReferenceUrl": None, "origQual": None} 20GAVAAXX100126:4:64:6132:191287 83 20 19893805 60 101M = 19893477 -428 GTTTTCTATGAAGTTATTTTCTAGGGATTCTGTTTTGTTGTCGTTGTTCACACTGTAGCTCTCAGATCTTACTGTTTTTTTTTTAATTGTGATAAAGCATA EHHHFGDGHFHF8EEFB=B@=GHFFBAA@8??>IHHEIHEH>EHH@HFG>GEGHEGHFA #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT GENOTYPE chr17 7621777 rs1544724 T G . . . GT 0/1 chr1 82154 rs4477212 a . . . . GT 1/1 chr1 752566 rs3094315 g A . . . GT 0/1 chr1 752721 rs3131972 A G . . . GT 0/0 chr1 776546 rs12124819 A . . . . GT 0/0 variant = {"contig": {"contigId": 0, "contigName": "chr17", "contigLength": None, "contigMD5": None, "referenceURL": None}, "position": 7621776, "referenceAllele": "T", "variantAllele": "G"} ADAM_variant_schema = ADAM_formats.types_dict['ADAMVariant'] ADAM_variant_schema.to_json() variant_calling_annotation = { "readDepth": None, "downsampled": None, "baseQRankSum": None, "clippingRankSum": None, "fisherStrandBiasPValue": None, "haplotypeScore": None, "inbreedingCoefficient": None, "alleleCountMLE": [], "alleleFrequencyMLE": [], "rmsMapQ": None, "mapq0Reads": None, "mqRankSum": None, "usedForNegativeTrainingSet": None, "usedForPositiveTrainingSet": None, "variantQualityByDepth": None, "readPositionRankSum": None, "vqslod": None, "culprit": None, "variantCallErrorProbability": None, "variantIsPassing": True, "variantFilters": [], } ADAM_variant_calling_annotations_schema = ADAM_formats.types_dict['VariantCallingAnnotations'] ADAM_variant_calling_annotations_schema.to_json() ADAM_nucleotide_contig_fragment_schema = ADAM_formats.types_dict['ADAMNucleotideContigFragment'] ADAM_nucleotide_contig_fragment_schema.to_json() contigName = 20 contigId = 0 fragmentSequence = TTCG……………AACCGGCTCGA contigLength = 20000000 fragmentNumber = 4 fragmentStartPosition = 40000 numberOfFragmentsInContig = 200020 9999944 A 35 ,,,....,.,,..,,...,.,,,,,,,.,,.,... 20 9999945 C 37 ,,,....,.,,..,,...,.,,,,,,,.,,.,...., 20 9999946 T 37 ,,,....,.,,..,,...,.,,,,,,,.,,.,...., 20 9999947 C 38 ,,,....,.,,..,,...,.,,,,,,,.,,.,....,, 20 9999948 T 40 ,,,....,.,,..,,...,.,,,,,,,.,,.,....,,., 20 9999949 T 41 ,,,....,.,,..,,...,.,,,,,,,.,,.,....,,.,. 20 9999950 A 42 ,,,....,.,,..,,...,.,,,,,,,.,,.,....,,.,.. 20 9999951 G 43 ,,,....,.,,..,,...,.,,,,,,,.,,.,....,,.,... 20 9999952 T 46 ,,,....,.,,..,,...,.,,,,,,,.,,.,....,,.,....., ADAM_pileup_schema = ADAM_formats.types_dict['ADAMPileup'] ADAM_pileup_schema.to_json() ADAM_variant_effect_schema = ADAM_formats.types_dict['VariantEffect'] ADAM_variant_effect_schema.to_json() ADAM_nested_pileup_schema = ADAM_formats.types_dict['ADAMNestedPileup'] ADAM_nested_pileup_schema.to_json()