import Bio Bio.__version__ from Bio import SeqIO from Bio.KEGG.REST import * from Bio.KEGG.KGML import KGML_parser from Bio.Graphics.KGML_vis import KGMLCanvas from Bio.Graphics.ColorSpiral import ColorSpiral from IPython.display import Image, HTML import random # A bit of code that will help us display the PDF output def PDF(filename): return HTML('' % filename) # A bit of helper code to shorten long text def head(text, lines=10): """ Print the first lines lines of the passed text. """ print '\n'.join(text.split('\n')[:lines] + ['[...]']) # Kyoto Encyclopedia of Genes and Genomes print(kegg_info("kegg").read()) # KEGG Pathway Database print(kegg_info("pathway").read()) # Escherichia coli K-12 MG1655 KEGG Genes Database print(kegg_info("eco").read()) # List all pathways in the pathway database head(kegg_list('pathway').read()) # Only list pathways present in E. coli K-12 MG1655 head(kegg_list('pathway', 'eco').read()) # E. coli K-12 MG1655 genes head(kegg_list('eco').read()) # Compound entry C01290 and glycan entry G00092 print(kegg_list('C01290+G00092').read()) # Compound entry C01290 and glycan entry G00092 print(kegg_list(['C01290', 'G00092']).read()) # Find shiga toxin genes head(kegg_find('genes', 'shiga+toxin').read()) # Find shiga toxin genes only in Escherichia coli O111 H-11128 (EHEC) print(kegg_find('eoi', 'shiga+toxin').read()) # Compounds with molecular weight between 300 and 310g/mol head(kegg_find('compound', '300-310/mol_weight').read()) # Compound as database entry head(kegg_get("cpd:C01290").read()) # Compound as image Image(kegg_get("cpd:C01290", "image").read()) # Gene as database entry head(kegg_get("ece:Z5100").read()) # Gene as amino acid sequence print(kegg_get("ece:Z5100", "aaseq").read()) # Gene as nucleotide sequence print(kegg_get("ece:Z5100", "ntseq").read()) # Parsing a returned sequence with SeqIO seq = SeqIO.read(kegg_get("ece:Z5100", "ntseq"), 'fasta') print seq.format('stockholm') # Pathway as database entry head(kegg_get("hsa05130").read()) # Pathway as image (png) Image(kegg_get("hsa05130", "image").read()) # Pathway as KGML head(kegg_get("hsa05130", "kgml").read()) # Render central metabolism Image(kegg_get("map01100", "image").read()) # Render fatty-acid biosynthesis Image(kegg_get("map00061", "image").read()) # Render E.coli K-12 MG1655 central metabolism Image(kegg_get("eco01100", "image").read()) # Render E.coli K-12 MG1655 fatty-acid biosynthesis Image(kegg_get("eco00061", "image").read()) # Render reference fatty-acid biosynthesis Image(kegg_get("ko00061", "image").read()) #Image(kegg_get("ec00061", "image").read()) #Image(kegg_get("rn00061", "image").read()) # Get KGML for fatty-acid biosynthesis ko_map = (kegg_get("ko00061", "kgml").read()) # KO version (KEGG orthologues) eco_map = (kegg_get("eco00061", "kgml").read()) # E. coli version # View the contents of ko_map KGML head(ko_map) # Trying to retrieve base map KGML generates an error #base_map = (kegg_get("map00061", "kgml").read()) pathway = KGML_parser.read(kegg_get("ko00061", "kgml")) print(pathway) canvas = KGMLCanvas(pathway) canvas.draw("fab_map.pdf") PDF("fab_map.pdf") canvas.import_imagemap = True canvas.draw("fab_map_with_image.pdf") PDF("fab_map_with_image.pdf") def draw_kegg_map(map_id): """ Render a local PDF of a KEGG map with the passed map ID """ # Get the background image first pathway = KGML_parser.read(kegg_get(map_id, "kgml")) canvas = KGMLCanvas(pathway, import_imagemap=True) img_filename = "%s.pdf" % map_id canvas.draw(img_filename) pathway = KGML_parser.read(kegg_get("ko00061", "kgml")) canvas = KGMLCanvas(pathway, import_imagemap=True) canvas.draw("fab_map_with_image.pdf") PDF("fab_map_with_image.pdf") pathway.orthologs[:5] print(pathway.orthologs[0]) pathway.orthologs[0].graphics element = pathway.orthologs[0].graphics[0] attrs = [element.name, element.x, element.y, element.coords, element.type, element.width, element.height, element.fgcolor, element.bgcolor, element.bounds, element.centre] print '\n'.join([str(attr) for attr in attrs]) # Helper function to convert colour as RGB tuple to hex string def rgb_to_hex(rgb): rgb = tuple([int(255*val) for val in rgb]) return '#' + ''.join([hex(val)[2:] for val in rgb]).upper() # Define arbitrary colours colorspiral = ColorSpiral() colorlist = colorspiral.get_colors(len(pathway.orthologs)) # Change the colours of ortholog elements for color, element in zip(colorlist, pathway.orthologs): for graphic in element.graphics: graphic.bgcolor = rgb_to_hex(color) canvas = KGMLCanvas(pathway, import_imagemap=True) canvas.draw("fab_map_new_colours.pdf") PDF("fab_map_new_colours.pdf") # Change the sizes of compound elements for size, element in zip(range(8, 8+len(pathway.compounds)), pathway.compounds): for graphic in element.graphics: graphic.width = size graphic.height = size canvas = KGMLCanvas(pathway, import_imagemap=True) canvas.draw("fab_map_new_sizes.pdf") PDF("fab_map_new_sizes.pdf") # The three KEGG maps with lines representing reactions. maps = ['ko01100', 'ko01110', 'ko01120'] [draw_kegg_map(map) for map in maps] print(kegg_get(maps).read()) PDF(maps[0]+'.pdf') PDF(maps[1]+'.pdf') PDF(maps[2]+'.pdf') # Use the bacterial diverse environments map pathway = KGML_parser.read(kegg_get("ko01120", "kgml")) # Change the widths of reaction entries elements for element in pathway.orthologs: for graphic in element.graphics: graphic.width = random.randrange(1, 10, 1) canvas = KGMLCanvas(pathway, import_imagemap=False) canvas.draw("bacteria_mod_widths.pdf") PDF("bacteria_mod_widths.pdf") # Define arbitrary colours colorspiral = ColorSpiral() colorlist = colorspiral.get_colors(len(pathway.orthologs)) # Change the colours of ortholog elements for color, element in zip(colorlist, pathway.orthologs): for graphic in element.graphics: graphic.fgcolor = rgb_to_hex(color) canvas = KGMLCanvas(pathway, import_imagemap=False) canvas.draw("bacteria_mod_colour.pdf") PDF("bacteria_mod_colour.pdf")