from Bio import SeqIO
!rm -f PlasmoDB-9.3_Pfalciparum3D7_Genome.fasta 2>/dev/null
!wget http://plasmodb.org/common/downloads/release-9.3/Pfalciparum3D7/fasta/data/PlasmoDB-9.3_Pfalciparum3D7_Genome.fasta
--2014-12-15 14:42:09-- http://plasmodb.org/common/downloads/release-9.3/Pfalciparum3D7/fasta/data/PlasmoDB-9.3_Pfalciparum3D7_Genome.fasta Resolving plasmodb.org (plasmodb.org)... 128.192.75.110 Connecting to plasmodb.org (plasmodb.org)|128.192.75.110|:80... connected. HTTP request sent, awaiting response... 200 OK Length: 23723399 (23M) [application/x-fasta] Saving to: ‘PlasmoDB-9.3_Pfalciparum3D7_Genome.fasta’ 100%[======================================>] 23,723,399 935KB/s in 26s 2014-12-15 14:42:36 (879 KB/s) - ‘PlasmoDB-9.3_Pfalciparum3D7_Genome.fasta’ saved [23723399/23723399]
genome_name = 'PlasmoDB-9.3_Pfalciparum3D7_Genome.fasta'
recs = SeqIO.parse(genome_name, 'fasta')
chroms = {}
for rec in recs:
print(rec.description)
Pf3D7_05_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1343557 | SO=chromosome Pf3D7_10_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1687656 | SO=chromosome Pf3D7_07_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1445207 | SO=chromosome Pf3D7_03_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1067971 | SO=chromosome Pf3D7_13_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=2925236 | SO=chromosome Pf3D7_11_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=2038340 | SO=chromosome Pf3D7_14_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=3291936 | SO=chromosome Pf3D7_09_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1541735 | SO=chromosome Pf3D7_01_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=640851 | SO=chromosome Pf3D7_12_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=2271494 | SO=chromosome Pf3D7_08_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1472805 | SO=chromosome Pf3D7_06_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1418242 | SO=chromosome Pf3D7_04_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=1200490 | SO=chromosome Pf3D7_02_v3 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=947102 | SO=chromosome M76611 | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=5967 | SO=mitochondrial_chromosome PFC10_API_IRAB | organism=Plasmodium_falciparum_3D7 | version=2012-02-01 | length=34242 | SO=apicoplast_chromosome
from Bio import SeqUtils
chrom_sizes = {}
chrom_GC = {}
recs = SeqIO.parse(genome_name, 'fasta')
block_size = 50000
min_GC = 100.0
max_GC = 0.0
for rec in recs:
if rec.description.find('SO=chromosome') == -1:
continue
chrom = int(rec.description.split('_')[1])
chrom_GC[chrom] = []
size = len(rec.seq)
chrom_sizes[chrom] = size
num_blocks = size // block_size + 1
for block in range(num_blocks):
start = block_size * block
if block == num_blocks - 1:
end = size
else:
end = block_size + start + 1
block_seq = rec.seq[start:end]
block_GC = SeqUtils.GC(block_seq)
if block_GC < min_GC:
min_GC = block_GC
if block_GC > max_GC:
max_GC = block_GC
chrom_GC[chrom].append(block_GC)
print(min_GC, max_GC)
(16.14167716645667, 43.673469387755105)
from __future__ import division
#Lets load notebook's Image
from IPython.core.display import Image
from reportlab.lib import colors
from reportlab.lib.units import cm
from Bio.Graphics import BasicChromosome
chroms = list(chrom_sizes.keys())
chroms.sort()
biggest_chrom = max(chrom_sizes.values())
my_genome = BasicChromosome.Organism(output_format="png")
my_genome.page_size = (29.7*cm, 21*cm) # check
telomere_length = 10
bottom_GC = 17.5
top_GC = 22.0
for chrom in chroms:
chrom_size = chrom_sizes[chrom]
chrom_representation = BasicChromosome.Chromosome('Cr %d' % chrom)
chrom_representation.scale_num = biggest_chrom
tel = BasicChromosome.TelomereSegment()
tel.scale = telomere_length
chrom_representation.add(tel)
num_blocks = len(chrom_GC[chrom])
for block, gc in enumerate(chrom_GC[chrom]):
my_GC = chrom_GC[chrom][block]
body = BasicChromosome.ChromosomeSegment()
if my_GC > top_GC:
body.fill_color = colors.Color(1, 0, 0)
elif my_GC < bottom_GC:
body.fill_color = colors.Color(1, 1, 0)
else:
my_color = (my_GC - bottom_GC) / (top_GC - bottom_GC)
body.fill_color = colors.Color(my_color, my_color, 1)
if block < num_blocks - 1:
body.scale = block_size
else:
body.scale = chrom_size % block_size
chrom_representation.add(body)
tel = BasicChromosome.TelomereSegment(inverted=True)
tel.scale = telomere_length
chrom_representation.add(tel)
my_genome.add(chrom_representation)
my_genome.draw("falciparum.png", "Plasmodium falciparum")
Image("falciparum.png")