#!/usr/bin/env python # coding: utf-8 # # Pathway pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1 # # This notebook describes the assembly of 4 single gene expression cassettes into a single pathway. # Notebooks describing the single gene expression vectors are linked at the end of this document as are notebooks # describing pYPKa promoter, gene and terminator vectors. Specific primers needed are also listed below. # # ![pathway with N genes](pw.png "pathway with N genes") # # The [pydna](https://pypi.python.org/pypi/pydna/) package is imported in the code cell below. # There is a [publication](http://www.biomedcentral.com/1471-2105/16/142) describing pydna as well as # [documentation](http://pydna.readthedocs.org/en/latest/) available online. # Pydna is developed on [Github](https://github.com/BjornFJohansson/pydna). # # The assembly performed here is based on content of the [INDATA_pth6.txt](INDATA_pth6.txt) text file. # The assembly log can be viewed [here](log.txt). # In[1]: from pydna.parsers import parse_primers from pydna.readers import read from pydna.amplify import pcr from pydna.assembly import Assembly # Initiate the standard primers needed to amplify each cassette. # The first cassette in the pathway is amplified with standard # primers 577 and 778, the last with # 775 and 578 and all others with 775 and 778. # Standard primers are listed [here](standard_primers.txt). # In[2]: p = { x.id: x for x in parse_primers("standard_primers.txt") } # The backbone vector is linearized with [EcoRV](http://rebase.neb.com/rebase/enz/EcoRV.html). # In[3]: from Bio.Restriction import EcoRV, NotI, PacI pYPKpw = read("pYPKpw.gb") # The cassette_products variable holds the list of expression cassette PCR products fragments to # be assembled. # In[4]: cassette_products = [] # The expression cassettes comes from a series of single gene expression vectors # held in the template_vectors list. # In[5]: cassette_vectors =''' pYPK0_TEF1_SsXYL1_TDH3.gb pYPK0_TDH3_SsXYL2_PGI.gb pYPK0_PGI_ScXKS1_FBA1.gb pYPK0_FBA1_ScTAL1_PDC1.gb'''.splitlines() template_vectors = [read(v.strip()) for v in cassette_vectors if v.strip()] template_vectors # The first cassette in the pathway is amplified with standard primers 577 and 778. Suggested PCR conditions can be found at the end of this document. # In[6]: cassette_products.append( pcr( p['577'], p['778'], template_vectors[0] ) ) # Cassettes in the middle cassettes are amplified with standard primers 775 and 778. Suggested PCR conditions can be found at the end of this document. # In[7]: cassette_products.extend( pcr( p['775'], p['778'], v) for v in template_vectors[1:-1] ) # The last cassette in the pathway is amplified with standard primers 775 and 578. Suggested PCR conditions can be found at the end of this document. # In[8]: cassette_products.append( pcr( p['775'], p['578'], template_vectors[-1] ) ) # The cassettes are given names based on their order in the final construct in the code cell below. # In[9]: for i, cp in enumerate(cassette_products): cp.name = "Cassette {}".format(i+1) print(cp.name) # Cassettes and plasmid backbone are joined by homologous recombination in a Saccharomyces cerevisiae ura3 host # which selects for the URA3 gene in pYPKpw. # In[10]: asm = Assembly( [pYPKpw.linearize(EcoRV)] + cassette_products, limit=167-47-10) asm # Normally, only one circular product should be formed since the # homology limit is quite large (see cell above). More than one # circular products might indicate an incorrect strategy. # The largest recombination product is chosen as candidate for # the pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1 pathway. # In[11]: candidate = asm.assemble_circular()[0] # This assembly figure shows how the fragments came together. # In[12]: candidate.figure() # The final pathway is synchronized to the backbone vector. This means that # the plasmid origin is shifted so that it matches the original. # In[13]: pw = candidate.synced(pYPKpw) # The cseguid checksum for the resulting plasmid is calculated for future reference. # The [cseguid checksum](http://pydna.readthedocs.org/en/latest/pydna.html#pydna.utils.cseguid) # uniquely identifies a circular double stranded sequence. # In[14]: pw.cseguid() # The file is given a name based on the sequence of expressed genes. # In[15]: pw.locus = "pw" pw.definition = "pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1" # Stamp sequence with cseguid checksum. This can be used to verify the # integrity of the sequence file. # In[16]: pw.stamp() # Write sequence to a local file. # In[17]: pw.write("pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1.gb") # The pathway can be extended by digestion with either NotI or PacI or both provided that the enzymes cut once in the final pathway sequence. # In[18]: print("NotI cuts {} time(s) and PacI cuts {} time(s) in the final pathway.".format(len(pw.cut(NotI)), len(pw.cut(PacI)))) # ## DOWNLOAD [pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1](pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1.gb) # In[19]: import pydna reloaded = read("pYPK0_SsXYL1_SsXYL2_ScXKS1_ScTAL1.gb") reloaded.verify_stamp() # ### New Primers needed for assembly. # # This list contains all needed primers that are not in the standard primer [list](standard_primers.txt) above. # In[20]: try: with open("new_primers.txt") as f: text = f.read() except IOError: text = "no new primers needed." print(text) # ### New single gene expression vectors (pYPK0_prom_gene_term) needed for assembly. # # Hyperlinks to notebook files describing the singlke gene expression plasmids needed for the assembly. # # [pYPK0_TEF1_SsXYL1_TDH3](pYPK0_TEF1_SsXYL1_TDH3.ipynb) # [pYPK0_TDH3_SsXYL2_PGI](pYPK0_TDH3_SsXYL2_PGI.ipynb) # [pYPK0_PGI_ScXKS1_FBA1](pYPK0_PGI_ScXKS1_FBA1.ipynb) # [pYPK0_FBA1_ScTAL1_PDC1](pYPK0_FBA1_ScTAL1_PDC1.ipynb) # # # ### New pYPKa vectors needed for assembly of the single gene expression vectors above. # # Hyperlinks to notebook files describing the pYPKa plasmids needed for the assembly of the single gene clones listed above. # # [pYPKa_ZE_TEF1](pYPKa_ZE_TEF1.ipynb) # [pYPKa_ZE_TDH3](pYPKa_ZE_TDH3.ipynb) # [pYPKa_ZE_PGI](pYPKa_ZE_PGI.ipynb) # [pYPKa_ZE_FBA1](pYPKa_ZE_FBA1.ipynb) # [pYPKa_ZE_PDC1](pYPKa_ZE_PDC1.ipynb) # # # ### Suggested PCR conditions # In[21]: for prd in cassette_products: print("\n\n\n\n") print("product name:", prd.name) print("forward primer", prd.forward_primer.name) print("reverse primer", prd.reverse_primer.name) print(prd.program())