from Bio import Entrez, Seq, SeqIO
from Bio.Alphabet import IUPAC
Entrez.email = "put@your_email.here"
hdl = Entrez.efetch(db='nucleotide', id=['NM_002299'], rettype='fasta') # Lactase gene
#for l in hdl:
# print l
seq = SeqIO.read(hdl, 'fasta')
w_seq = seq[11:5795]
w_seq
SeqRecord(seq=Seq('ATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGG...TGA', SingleLetterAlphabet()), id='gi|32481205|ref|NM_002299.2|', name='gi|32481205|ref|NM_002299.2|', description='gi|32481205|ref|NM_002299.2| Homo sapiens lactase (LCT), mRNA', dbxrefs=[])
w_hdl = open('example.fasta', 'w')
SeqIO.write([w_seq], w_hdl, 'fasta')
w_hdl.close()
recs = SeqIO.parse('example.fasta', 'fasta')
for rec in recs:
print(type(rec))
seq = rec.seq
print(rec.description)
print(seq[:10])
print(seq.alphabet)
<class 'Bio.SeqRecord.SeqRecord'> gi|32481205|ref|NM_002299.2| Homo sapiens lactase (LCT), mRNA ATGGAGCTGT SingleLetterAlphabet()
seq = Seq.Seq(str(seq), IUPAC.unambiguous_dna)
seq
Seq('ATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATGCTGG...TGA', IUPACUnambiguousDNA())
print((seq[:12], seq[-12:]))
rna = seq.transcribe()
rna
(Seq('ATGGAGCTGTCT', IUPACUnambiguousDNA()), Seq('TCTTCATTCTGA', IUPACUnambiguousDNA()))
Seq('AUGGAGCUGUCUUGGCAUGUAGUCUUUAUUGCCCUGCUAAGUUUUUCAUGCUGG...UGA', IUPACUnambiguousRNA())
prot = seq.translate()
prot
Seq('MELSWHVVFIALLSFSCWGSDWESDRNFISTAGPLTNDLLHNLSGLLGDQSSNF...SF*', HasStopCodon(IUPACProtein(), '*'))