from Bio import Entrez, Medline, SeqIO
Entrez.email = "put@your_email.here"
#This gives you the list of available databases
handle = Entrez.einfo()
rec = Entrez.read(handle)
print(rec)
{u'DbList': ['pubmed', 'protein', 'nuccore', 'nucleotide', 'nucgss', 'nucest', 'structure', 'genome', 'assembly', 'genomeprj', 'bioproject', 'biosample', 'blastdbinfo', 'books', 'cdd', 'clinvar', 'clone', 'gap', 'gapplus', 'grasp', 'dbvar', 'epigenomics', 'gene', 'gds', 'geoprofiles', 'homologene', 'medgen', 'journals', 'mesh', 'ncbisearch', 'nlmcatalog', 'omim', 'orgtrack', 'pmc', 'popset', 'probe', 'proteinclusters', 'pcassay', 'biosystems', 'pccompound', 'pcsubstance', 'pubmedhealth', 'seqannot', 'snp', 'sra', 'taxonomy', 'toolkit', 'toolkitall', 'toolkitbook', 'unigene', 'gencoll', 'gtr']}
handle = Entrez.esearch(db="nucleotide", term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]')
rec_list = Entrez.read(handle)
if rec_list['RetMax'] < rec_list['Count']:
handle = Entrez.esearch(db="nucleotide", term='CRT[Gene Name] AND "Plasmodium falciparum"[Organism]',
retmax=rec_list['Count'])
rec_list = Entrez.read(handle)
id_list = rec_list['IdList']
hdl = Entrez.efetch(db='nucleotide', id=id_list, rettype='gb')
recs = list(SeqIO.parse(hdl, 'gb'))
for rec in recs:
if rec.name == 'KM288867':
break
print(rec.name)
print(rec.description)
KM288867 Plasmodium falciparum clone PF3D7_0709000 chloroquine resistance transporter (CRT) gene, complete cds.
for feature in rec.features:
if feature.type == 'gene':
print(feature.qualifiers['gene'])
elif feature.type == 'exon':
loc = feature.location
print('Exon', loc.start, loc.end, loc.strand)
else:
print('not processed:\n%s' % feature)
not processed: type: source location: [0:10000](+) qualifiers: Key: clone, Value: ['PF3D7_0709000'] Key: db_xref, Value: ['taxon:5833'] Key: mol_type, Value: ['genomic DNA'] Key: organism, Value: ['Plasmodium falciparum'] ['CRT'] not processed: type: mRNA location: join{[2751:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:7598](+)} qualifiers: Key: gene, Value: ['CRT'] Key: product, Value: ['chloroquine resistance transporter'] Sub-Features type: mRNA location: [2751:3543](+) qualifiers: type: mRNA location: [3720:3989](+) qualifiers: type: mRNA location: [4168:4341](+) qualifiers: type: mRNA location: [4513:4646](+) qualifiers: type: mRNA location: [4799:4871](+) qualifiers: type: mRNA location: [4994:5070](+) qualifiers: type: mRNA location: [5166:5249](+) qualifiers: type: mRNA location: [5376:5427](+) qualifiers: type: mRNA location: [5564:5621](+) qualifiers: type: mRNA location: [5769:5862](+) qualifiers: type: mRNA location: [6055:6100](+) qualifiers: type: mRNA location: [6247:6302](+) qualifiers: type: mRNA location: [6471:7598](+) qualifiers: not processed: type: 5'UTR location: [2751:3452](+) qualifiers: Key: gene, Value: ['CRT'] not processed: type: primer_bind location: [2935:2958](+) qualifiers: not processed: type: primer_bind location: [3094:3121](+) qualifiers: not processed: type: CDS location: join{[3452:3543](+), [3720:3989](+), [4168:4341](+), [4513:4646](+), [4799:4871](+), [4994:5070](+), [5166:5249](+), [5376:5427](+), [5564:5621](+), [5769:5862](+), [6055:6100](+), [6247:6302](+), [6471:6548](+)} qualifiers: Key: codon_start, Value: ['1'] Key: db_xref, Value: ['GI:706072609'] Key: gene, Value: ['CRT'] Key: product, Value: ['chloroquine resistance transporter'] Key: protein_id, Value: ['AIW62921.1'] Key: translation, Value: ['MKFASKKNNQKNSSKNDERYRELDNLVQEGNGSRLGGGSCLGKCAHVFKLIFKEIKDNIFIYILSIIYLSVCVMNKIFAKRTLNKIGNYSFVTSETHNFICMIMFFIVYSLFGNKKGNSKERHRSFNLQFFAISMLDACSVILAFIGLTRTTGNIQSFVLQLSIPINMFFCFLILRYRYHLYNYLGAVIIVVTIALVEMKLSFETQEENSIIFNLVLISALIPVCFSNMTREIVFKKYKIDILRLNAMVSFFQLFTSCLILPVYTLPFLKQLHLPYNEIWTNIKNGFACLFLGRNTVVENCGLGMAKLCDDCDGAWKTFALFSFFNICDNLITSYIIDKFSTMTYTIVSCIQGPAIAIAYYFKFLAGDVVREPRLLDFVTLFGYLFGSIIYRVGNIILERKKMRNEENEDSEGELTNVDSIITQ'] Sub-Features type: CDS location: [3452:3543](+) qualifiers: type: CDS location: [3720:3989](+) qualifiers: type: CDS location: [4168:4341](+) qualifiers: type: CDS location: [4513:4646](+) qualifiers: type: CDS location: [4799:4871](+) qualifiers: type: CDS location: [4994:5070](+) qualifiers: type: CDS location: [5166:5249](+) qualifiers: type: CDS location: [5376:5427](+) qualifiers: type: CDS location: [5564:5621](+) qualifiers: type: CDS location: [5769:5862](+) qualifiers: type: CDS location: [6055:6100](+) qualifiers: type: CDS location: [6247:6302](+) qualifiers: type: CDS location: [6471:6548](+) qualifiers: ('Exon', ExactPosition(3452), ExactPosition(3543), 1) ('Exon', ExactPosition(3720), ExactPosition(3989), 1) ('Exon', ExactPosition(4168), ExactPosition(4341), 1) not processed: type: primer_bind location: [4288:4323](-) qualifiers: ('Exon', ExactPosition(4513), ExactPosition(4646), 1) ('Exon', ExactPosition(4799), ExactPosition(4871), 1) ('Exon', ExactPosition(4994), ExactPosition(5070), 1) ('Exon', ExactPosition(5166), ExactPosition(5249), 1) ('Exon', ExactPosition(5376), ExactPosition(5427), 1) ('Exon', ExactPosition(5564), ExactPosition(5621), 1) ('Exon', ExactPosition(5769), ExactPosition(5862), 1) ('Exon', ExactPosition(6055), ExactPosition(6100), 1) ('Exon', ExactPosition(6247), ExactPosition(6302), 1) ('Exon', ExactPosition(6471), ExactPosition(6548), 1) not processed: type: 3'UTR location: [6548:7598](+) qualifiers: Key: gene, Value: ['CRT'] not processed: type: primer_bind location: [7833:7856](-) qualifiers:
for name, value in rec.annotations.items():
print('%s=%s' % (name, value))
sequence_version=1 source=Plasmodium falciparum (malaria parasite P. falciparum) taxonomy=['Eukaryota', 'Alveolata', 'Apicomplexa', 'Aconoidasida', 'Haemosporida', 'Plasmodium', 'Plasmodium (Laverania)'] keywords=[''] references=[Reference(title='Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction', ...), Reference(title='Direct Submission', ...)] accessions=['KM288867'] data_file_division=INV date=12-NOV-2014 organism=Plasmodium falciparum gi=706072608
print(len(rec.seq))
10000
refs = rec.annotations['references']
for ref in refs:
if ref.pubmed_id != '':
print(ref.pubmed_id)
handle = Entrez.efetch(db="pubmed", id=[ref.pubmed_id],
rettype="medline", retmode="text")
records = Medline.parse(handle)
for med_rec in records:
for k, v in med_rec.items():
print('%s: %s' % (k, v))
25370483 LID: 10.1038/ncomms6329 [doi] STAT: In-Process DEP: 20141105 MID: ['NIHMS630149'] DA: 20141105 AID: ['ncomms6329 [pii]', '10.1038/ncomms6329 [doi]'] CRDT: ['2014/11/06 06:00'] DP: 2014 GR: ['1DP2OD007124/OD/NIH HHS/United States', '5-T32-ES007020/ES/NIEHS NIH HHS/United States', '5-T32-GM08334/GM/NIGMS NIH HHS/United States', 'DP2 OD007124/OD/NIH HHS/United States', 'P30 ES002109/ES/NIEHS NIH HHS/United States'] OWN: NLM PT: ['Journal Article', 'Research Support, N.I.H., Extramural', "Research Support, Non-U.S. Gov't"] LA: ['eng'] FAU: ['Goldfless, Stephen J', 'Wagner, Jeffrey C', 'Niles, Jacquin C'] JT: Nature communications LR: 20150117 PG: 5329 TI: Versatile control of Plasmodium falciparum gene expression with an inducible protein-RNA interaction. PMCR: ['2015/05/05 00:00'] PL: England TA: Nat Commun JID: 101528555 AB: The available tools for conditional gene expression in Plasmodium falciparum are limited. Here, to enable reliable control of target gene expression, we build a system to efficiently modulate translation. We overcame several problems associated with other approaches for regulating gene expression in P. falciparum. Specifically, our system functions predictably across several native and engineered promoter contexts, and affords control over reporter and native parasite proteins irrespective of their subcellular compartmentalization. Induction and repression of gene expression are rapid, homogeneous and stable over prolonged periods. To demonstrate practical application of our system, we used it to reveal direct links between antimalarial drugs and their native parasite molecular target. This is an important outcome given the rapid spread of resistance, and intensified efforts to efficiently discover and optimize new antimalarial drugs. Overall, the studies presented highlight the utility of our system for broadly controlling gene expression and performing functional genetics in P. falciparum. AD: Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. Department of Biological Engineering, Massachusetts Institute of Technology, 77 Massachusetts Avenue, Cambridge, Massachusetts 02139, USA. VI: 5 IS: 2041-1723 (Electronic) 2041-1723 (Linking) PMC: PMC4223869 AU: ['Goldfless SJ', 'Wagner JC', 'Niles JC'] MHDA: 2014/11/06 06:00 PHST: ['2014/04/15 [received]', '2014/09/20 [accepted]'] OID: ['NLM: NIHMS630149 [Available on 05/05/15]', 'NLM: PMC4223869 [Available on 05/05/15]'] EDAT: 2014/11/06 06:00 SI: ['GENBANK/KM288848', 'GENBANK/KM288849', 'GENBANK/KM288850', 'GENBANK/KM288851', 'GENBANK/KM288852', 'GENBANK/KM288853', 'GENBANK/KM288854', 'GENBANK/KM288855', 'GENBANK/KM288856', 'GENBANK/KM288857', 'GENBANK/KM288858', 'GENBANK/KM288859', 'GENBANK/KM288860', 'GENBANK/KM288861', 'GENBANK/KM288862', 'GENBANK/KM288863', 'GENBANK/KM288864', 'GENBANK/KM288865', 'GENBANK/KM288866', 'GENBANK/KM288867'] SO: Nat Commun. 2014 Nov 5;5:5329. doi: 10.1038/ncomms6329. SB: IM PMID: 25370483 PST: epublish