You just need to do this only once
!rm -f NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam 2>/dev/null
!rm -f NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai 2>/dev/null
!wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam
!wget ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai
--2015-06-26 14:36:30-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam => ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam’ Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8 Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected. Logging in as anonymous ... Logged in! ==> SYST ... done. ==> PWD ... done. ==> TYPE I ... done. ==> CWD (1) /vol1/ftp/phase3/data/NA18489/exome_alignment ... done. ==> SIZE NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam ... 327067172 ==> PASV ... done. ==> RETR NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam ... done. Length: 327067172 (312M) (unauthoritative) NA18489.chrom20.ILL 100%[=====================>] 311.92M 5.02MB/s in 31s 2015-06-26 14:37:02 (9.98 MB/s) - ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam’ saved [327067172] --2015-06-26 14:37:02-- ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA18489/exome_alignment/NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai => ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai’ Resolving ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)... 193.62.192.8 Connecting to ftp.1000genomes.ebi.ac.uk (ftp.1000genomes.ebi.ac.uk)|193.62.192.8|:21... connected. Logging in as anonymous ... Logged in! ==> SYST ... done. ==> PWD ... done. ==> TYPE I ... done. ==> CWD (1) /vol1/ftp/phase3/data/NA18489/exome_alignment ... done. ==> SIZE NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai ... 170688 ==> PASV ... done. ==> RETR NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai ... done. Length: 170688 (167K) (unauthoritative) NA18489.chrom20.ILL 100%[=====================>] 166.69K --.-KB/s in 0.08s 2015-06-26 14:37:02 (2.02 MB/s) - ‘NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam.bai’ saved [170688]
from collections import defaultdict
import numpy as np
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
import pysam
bam = pysam.AlignmentFile('NA18489.chrom20.ILLUMINA.bwa.YRI.exome.20121211.bam', 'rb')
headers = bam.header
for record_type, records in headers.items():
print (record_type)
for i, record in enumerate(records):
print('\t%d' % (i + 1))
for field, value in record.items():
print('\t\t%s\t%s' % (field, value))
SQ 1 LN 249250621 M5 1b22b98cdeb4a9304cb5d48026a85128 SN 1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 2 LN 243199373 M5 a0d9851da00400dec1098a9255ac712e SN 2 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 3 LN 198022430 M5 fdfd811849cc2fadebc929bb925902e5 SN 3 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 4 LN 191154276 M5 23dccd106897542ad87d2765d28a19a1 SN 4 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 5 LN 180915260 M5 0740173db9ffd264d728f32784845cd7 SN 5 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 6 LN 171115067 M5 1d3a93a248d92a729ee764823acbbc6b SN 6 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 7 LN 159138663 M5 618366e953d6aaad97dbe4777c29375e SN 7 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 8 LN 146364022 M5 96f514a9929e410c6651697bded59aec SN 8 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 9 LN 141213431 M5 3e273117f15e0a400f01055d9f393768 SN 9 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 10 LN 135534747 M5 988c28e000e84c26d552359af1ea2e1d SN 10 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 11 LN 135006516 M5 98c59049a2df285c76ffb1c6db8f8b96 SN 11 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 12 LN 133851895 M5 51851ac0e1a115847ad36449b0015864 SN 12 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 13 LN 115169878 M5 283f8d7892baa81b510a015719ca7b0b SN 13 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 14 LN 107349540 M5 98f3cae32b2a2e9524bc19813927542e SN 14 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 15 LN 102531392 M5 e5645a794a8238215b2cd77acb95a078 SN 15 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 16 LN 90354753 M5 fc9b1a7b42b97a864f56b348b06095e6 SN 16 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 17 LN 81195210 M5 351f64d4f4f9ddd45b35336ad97aa6de SN 17 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 18 LN 78077248 M5 b15d4b2d29dde9d3e4f93d1d0f2cbc9c SN 18 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 19 LN 59128983 M5 1aacd71f30db8e561810913e0b72636d SN 19 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 20 LN 63025520 M5 0dec9660ec1efaaf33281c0d5ea2560f SN 20 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 21 LN 48129895 M5 2979a6085bfe28e3ad6f552f361ed74d SN 21 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 22 LN 51304566 M5 a718acaa6135fdca8357d5bfe94211dd SN 22 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 23 LN 155270560 M5 7e0e2e580297b7764e31dbc80c2540dd SN X UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 24 LN 59373566 M5 1fa3474750af0948bdf97d5a0ee52e51 SN Y UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 25 LN 16569 M5 c68f52674c9fb33aef52dcf399755519 SN MT UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 26 LN 4262 M5 f3814841f1939d3ca19072d9e89f3fd7 SN GL000207.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 27 LN 15008 M5 1c1b2cd1fccbc0a99b6a447fa24d1504 SN GL000226.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 28 LN 19913 M5 d0f40ec87de311d8e715b52e4c7062e1 SN GL000229.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 29 LN 27386 M5 ba8882ce3a1efa2080e5d29b956568a4 SN GL000231.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 30 LN 27682 M5 851106a74238044126131ce2a8e5847c SN GL000210.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 31 LN 33824 M5 99795f15702caec4fa1c4e15f8a29c07 SN GL000239.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 32 LN 34474 M5 118a25ca210cfbcdfb6c2ebb249f9680 SN GL000235.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 33 LN 36148 M5 dfb7e7ec60ffdcb85cb359ea28454ee9 SN GL000201.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 34 LN 36422 M5 7de00226bb7df1c57276ca6baabafd15 SN GL000247.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 35 LN 36651 M5 89bc61960f37d94abf0df2d481ada0ec SN GL000245.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 36 LN 37175 M5 6f5efdd36643a9b8c8ccad6f2f1edc7b SN GL000197.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 37 LN 37498 M5 96358c325fe0e70bee73436e8bb14dbd SN GL000203.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 38 LN 38154 M5 e4afcd31912af9d9c2546acf1cb23af2 SN GL000246.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 39 LN 38502 M5 1d78abec37c15fe29a275eb08d5af236 SN GL000249.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 40 LN 38914 M5 d92206d1bb4c3b4019c43c0875c06dc0 SN GL000196.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 41 LN 39786 M5 5a8e43bec9be36c7b49c84d585107776 SN GL000248.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 42 LN 39929 M5 0996b4475f353ca98bacb756ac479140 SN GL000244.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 43 LN 39939 M5 131b1efc3270cc838686b54e7c34b17b SN GL000238.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 44 LN 40103 M5 06cbf126247d89664a4faebad130fe9c SN GL000202.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 45 LN 40531 M5 93f998536b61a56fd0ff47322a911d4b SN GL000234.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 46 LN 40652 M5 3e06b6741061ad93a8587531307057d8 SN GL000232.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 47 LN 41001 M5 43f69e423533e948bfae5ce1d45bd3f1 SN GL000206.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 48 LN 41933 M5 445a86173da9f237d7bcf41c6cb8cc62 SN GL000240.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 49 LN 41934 M5 fdcd739913efa1fdc64b6c0cd7016779 SN GL000236.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 50 LN 42152 M5 ef4258cdc5a45c206cea8fc3e1d858cf SN GL000241.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 51 LN 43341 M5 cc34279a7e353136741c9fce79bc4396 SN GL000243.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 52 LN 43523 M5 2f8694fc47576bc81b5fe9e7de0ba49e SN GL000242.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 53 LN 43691 M5 b4eb71ee878d3706246b7c1dbef69299 SN GL000230.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 54 LN 45867 M5 e0c82e7751df73f4f6d0ed30cdc853c0 SN GL000237.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 55 LN 45941 M5 7fed60298a8d62ff808b74b6ce820001 SN GL000233.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 56 LN 81310 M5 efc49c871536fa8d79cb0a06fa739722 SN GL000204.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 57 LN 90085 M5 868e7784040da90d900d2d1b667a1383 SN GL000198.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 58 LN 92689 M5 aa81be49bf3fe63a79bdc6a6f279abf6 SN GL000208.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 59 LN 106433 M5 d75b436f50a8214ee9c2a51d30b2c2cc SN GL000191.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 60 LN 128374 M5 a4aead23f8053f2655e468bcc6ecdceb SN GL000227.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 61 LN 129120 M5 c5a17c97e2c1a0b6a9cc5a6b064b714f SN GL000228.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 62 LN 137718 M5 46c2032c37f2ed899eb41c0473319a69 SN GL000214.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 63 LN 155397 M5 3238fb74ea87ae857f9c7508d315babb SN GL000221.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 64 LN 159169 M5 f40598e2a5a6b26e84a3775e0d1e2c81 SN GL000209.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 65 LN 161147 M5 1d708b54644c26c7e01c2dad5426d38c SN GL000218.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 66 LN 161802 M5 fc35de963c57bf7648429e6454f1c9db SN GL000220.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 67 LN 164239 M5 9d424fdcc98866650b58f004080a992a SN GL000213.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 68 LN 166566 M5 7daaa45c66b288847b9b32b964e623d3 SN GL000211.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 69 LN 169874 M5 569af3b73522fab4b40995ae4944e78e SN GL000199.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 70 LN 172149 M5 6d243e18dea1945fb7f2517615b8f52e SN GL000217.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 71 LN 172294 M5 642a232d91c486ac339263820aef7fe0 SN GL000216.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 72 LN 172545 M5 5eb3b418480ae67a997957c909375a73 SN GL000215.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 73 LN 174588 M5 d22441398d99caf673e9afb9a1908ec5 SN GL000205.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 74 LN 179198 M5 f977edd13bac459cb2ed4a5457dba1b3 SN GL000219.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 75 LN 179693 M5 d5b2fc04f6b41b212a4198a07f450e20 SN GL000224.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 76 LN 180455 M5 399dfa03bf32022ab52a846f7ca35b30 SN GL000223.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 77 LN 182896 M5 5d9ec007868d517e73543b005ba48535 SN GL000195.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 78 LN 186858 M5 563531689f3dbd691331fd6c5730a88b SN GL000212.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 79 LN 186861 M5 6fe9abac455169f50470f5a6b01d0f59 SN GL000222.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 80 LN 187035 M5 75e4c8d17cd4addf3917d1703cacaf25 SN GL000200.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 81 LN 189789 M5 dbb6e8ece0b5de29da56601613007c2a SN GL000193.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 82 LN 191469 M5 6ac8f815bf8e845bb3031b73f812c012 SN GL000194.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 83 LN 211173 M5 63945c3e6962f28ffd469719a747e73c SN GL000225.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 84 LN 547496 M5 325ba9e808f669dfeee210fdd7b470ac SN GL000192.1 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 85 LN 171823 M5 6743bd63b3ff2b5b8985d8933c53290a SN NC_007605 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human 86 LN 35477943 M5 5b6a4b3a81a2d3c134b7d14bf6ad39f1 SN hs37d5 UR ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz AS:NCBI37 SP:Human RG 1 LB Solexa-51039 CN BI DS SRP004074 SM NA18489 PI 220 ID SRR100025 PL ILLUMINA CO 1
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-3-3adad14901f7> in <module>() 4 for i, record in enumerate(records): 5 print('\t%d' % (i + 1)) ----> 6 for field, value in record.items(): 7 print('\t\t%s\t%s' % (field, value)) AttributeError: 'str' object has no attribute 'items'
#0-based
for rec in bam:
if rec.cigarstring.find('M') > -1 and rec.cigarstring.find('S') > -1 and not rec.is_unmapped and not rec.mate_is_unmapped:
break
print(rec.query_name, rec.reference_id, bam.getrname(rec.reference_id), rec.reference_start, rec.reference_end)
print(rec.cigarstring)
print(rec.query_alignment_start, rec.query_alignment_end, rec.query_alignment_length)
print(rec.next_reference_id, rec.next_reference_start, rec.template_length)
print(rec.is_paired, rec.is_proper_pair, rec.is_unmapped, rec.mapping_quality)
print(rec.query_qualities)
print(rec.query_alignment_qualities)
print(rec.query_sequence)
('SRR100025.62130839', 19, '20', 59996, 60048) 52M24S (0, 52, 76) (19, 60228, 295) (True, True, False, 60) array('B', [33, 34, 36, 33, 39, 34, 33, 38, 39, 34, 40, 35, 40, 40, 32, 40, 38, 33, 35, 38, 33, 39, 40, 34, 37, 39, 36, 30, 36, 37, 34, 35, 34, 40, 37, 34, 38, 28, 40, 40, 38, 32, 33, 32, 36, 34, 37, 24, 34, 35, 31, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]) array('B', [33, 34, 36, 33, 39, 34, 33, 38, 39, 34, 40, 35, 40, 40, 32, 40, 38, 33, 35, 38, 33, 39, 40, 34, 37, 39, 36, 30, 36, 37, 34, 35, 34, 40, 37, 34, 38, 28, 40, 40, 38, 32, 33, 32, 36, 34, 37, 24, 34, 35, 31, 2]) CTCAGATCCAGAGGTGGAAGAGGAAGGAAGCTTGGAACCCTATAGAGTTGCTGAGTGCCAGGACCAGATACTGGGC
counts = [0] * 76
for n, rec in enumerate(bam.fetch('20', 0, 10000000)):
for i in range(rec.query_alignment_start, rec.query_alignment_end):
counts[i] += 1
freqs = [x / (n + 1.) for x in counts]
plt.plot(range(1, 77), freqs)
[<matplotlib.lines.Line2D at 0x7fc9c50789d0>]
phreds = defaultdict(list)
for rec in bam.fetch('20', 0, None):
for i in range(rec.query_alignment_start, rec.query_alignment_end):
phreds[i].append(rec.query_qualities[i])
maxs = [max(phreds[i]) for i in range(76)]
tops = [np.percentile(phreds[i], 95) for i in range(76)]
medians = [np.percentile(phreds[i], 50) for i in range(76)]
bottoms = [np.percentile(phreds[i], 5) for i in range(76)]
medians_fig = [x - y for x, y in zip(medians, bottoms)]
tops_fig = [x - y for x, y in zip(tops, medians)]
maxs_fig = [x - y for x, y in zip(maxs, tops)]
fig, ax = plt.subplots(figsize=(16,9))
ax.stackplot(range(1, 77), (bottoms, medians_fig, tops_fig, maxs_fig))
ax.plot(range(1, 77), maxs, 'k-')