from __future__ import print_function
from Bio import PDB
repository = PDB.PDBList()
repository.retrieve_pdb_file('1TUP', pdir='.')
repository.retrieve_pdb_file('1OLG', pdir='.')
repository.retrieve_pdb_file('1YCQ', pdir='.')
Downloading PDB structure '1TUP'... Downloading PDB structure '1OLG'... Downloading PDB structure '1YCQ'...
'./pdb1ycq.ent'
parser = PDB.PDBParser()
p53_1tup = parser.get_structure('P 53 - DNA Binding', 'pdb1tup.ent')
p53_1olg = parser.get_structure('P 53 - Tetramerization', 'pdb1olg.ent')
p53_1ycq = parser.get_structure('P 53 - Transactivation', 'pdb1ycq.ent')
/home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6146. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6147. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6148. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain E is discontinuous at line 6149. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain F is discontinuous at line 6171. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 6185. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 6383. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain C is discontinuous at line 6453. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain A is discontinuous at line 1125. PDBConstructionWarning) /home/tra/Dropbox/soft/biopython/Bio/PDB/StructureBuilder.py:87: PDBConstructionWarning: WARNING: Chain B is discontinuous at line 1160. PDBConstructionWarning)
def print_pdb_headers(headers, indent=0):
ind_text = ' ' * indent
for header, content in headers.items():
if type(content) == dict:
print('\n%s%20s:' % (ind_text, header))
print_pdb_headers(content, indent + 4)
print()
elif type(content) == list:
print('%s%20s:' % (ind_text, header))
for elem in content:
print('%s%21s %s' % (ind_text, '->', elem))
else:
print('%s%20s: %s' % (ind_text, header, content))
print_pdb_headers(p53_1tup.header)
structure_method: x-ray diffraction head: antitumor protein/dna journal: AUTH Y.CHO,S.GORINA,P.D.JEFFREY,N.P.PAVLETICHTITL CRYSTAL STRUCTURE OF A P53 TUMOR SUPPRESSOR-DNATITL 2 COMPLEX: UNDERSTANDING TUMORIGENIC MUTATIONS.REF SCIENCE V. 265 346 1994REFN ISSN 0036-8075PMID 8023157 journal_reference: y.cho,s.gorina,p.d.jeffrey,n.p.pavletich crystal structure of a p53 tumor suppressor-dna complex: understanding tumorigenic mutations. science v. 265 346 1994 issn 0036-8075 8023157 compound: 1: molecule: dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') misc: engineered: yes chain: e 3: molecule: protein (p53 tumor suppressor ) misc: engineered: yes chain: a, b, c 2: molecule: dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') misc: engineered: yes chain: f keywords: antigen p53, antitumor protein/dna complex name: tumor suppressor p53 complexed with dna author: Y.Cho,S.Gorina,P.D.Jeffrey,N.P.Pavletich deposition_date: 1995-07-11 release_date: 1995-07-11 source: 1: synthetic: yes misc: 3: expression_system: escherichia coli expression_system_taxid: 562 organism_scientific: homo sapiens misc: cell: human vulva carcinoma expression_system_plasmid: pet3d cell_line: a431 organism_taxid: 9606 organism_common: human 2: synthetic: yes misc: resolution: 2.2 structure_reference: -> n.p.pavletich,k.a.chambers,c.o.pabo the dna-binding domain of p53 contains the four conserved regions and the major mutation hot spots genes dev. v. 7 2556 1993 issn 0890-9369 -> b.vogelstein,k.w.kinzler p53 function and dysfunction cell(cambridge,mass.) v. 70 523 1992 issn 0092-8674
print(p53_1tup.header['compound'])
print(p53_1olg.header['compound'])
print(p53_1ycq.header['compound'])
{'1': {'molecule': "dna (5'-d(*tp*tp*tp*cp*cp*tp*ap*gp*ap*cp*tp*tp*gp*cp*cp*cp*a p*ap*tp*tp*a)-3') ", 'misc': '', 'engineered': 'yes', 'chain': 'e'}, '3': {'molecule': 'protein (p53 tumor suppressor )', 'misc': '', 'engineered': 'yes', 'chain': 'a, b, c'}, '2': {'molecule': "dna (5'-d(*ap*tp*ap*ap*tp*tp*gp*gp*gp*cp*ap*ap*gp*tp*cp*tp*a p*gp*gp*ap*a)-3') ", 'misc': '', 'engineered': 'yes', 'chain': 'f'}} {'1': {'molecule': 'tumor suppressor p53 (oligomerization domain)', 'misc': '', 'engineered': 'yes', 'chain': 'a, b, c, d'}} {'1': {'molecule': 'mdm2', 'engineered': 'yes', 'misc': '', 'synonym': 'mdm2', 'chain': 'a'}, '2': {'fragment': 'residues 13 - 29', 'molecule': 'p53', 'misc': '', 'engineered': 'yes', 'chain': 'b'}}
def describe_model(name, pdb):
print()
for model in pdb:
for chain in model:
print('%s - Chain: %s. Number of residues: %d. Number of atoms: %d.' %
(name, chain.id, len(chain), len(list(chain.get_atoms()))))
describe_model('1TUP', p53_1tup)
describe_model('1OLG', p53_1olg)
describe_model('1YCQ', p53_1ycq)
#will go deep in a next recipe (bottom up)
1TUP - Chain: E. Number of residues: 43. Number of atoms: 442. 1TUP - Chain: F. Number of residues: 35. Number of atoms: 449. 1TUP - Chain: A. Number of residues: 395. Number of atoms: 1734. 1TUP - Chain: B. Number of residues: 265. Number of atoms: 1593. 1TUP - Chain: C. Number of residues: 276. Number of atoms: 1610. 1OLG - Chain: A. Number of residues: 42. Number of atoms: 698. 1OLG - Chain: B. Number of residues: 42. Number of atoms: 698. 1OLG - Chain: C. Number of residues: 42. Number of atoms: 698. 1OLG - Chain: D. Number of residues: 42. Number of atoms: 698. 1YCQ - Chain: A. Number of residues: 123. Number of atoms: 741. 1YCQ - Chain: B. Number of residues: 16. Number of atoms: 100.
for residue in p53_1tup.get_residues():
if residue.id[0] in [' ', 'W']:
continue
print(residue.id)
('H_ ZN', 951, ' ') ('H_ ZN', 952, ' ') ('H_ ZN', 953, ' ')
res = next(p53_1tup[0]['A'].get_residues())
print(res)
for atom in res:
print(atom, atom.serial_number, atom.element)
print(p53_1tup[0]['A'][94]['CA'])
<Residue SER het= resseq=94 icode= > <Atom N> 858 N <Atom CA> 859 C <Atom C> 860 C <Atom O> 861 O <Atom CB> 862 C <Atom OG> 863 O <Atom CA>
from Bio.SeqIO import PdbIO, FastaIO
def get_fasta(pdb_file, fasta_file, transfer_ids=None):
fasta_writer = FastaIO.FastaWriter(fasta_file)
fasta_writer.write_header()
for rec in PdbIO.PdbSeqresIterator(pdb_file):
if len(rec.seq) == 0:
continue
if transfer_ids is not None and rec.id not in transfer_ids:
continue
print(rec.id, rec.seq, len(rec.seq))
fasta_writer.write_record(rec)
get_fasta(open('pdb1tup.ent'), open('1tup.fasta', 'w'), transfer_ids=['1TUP:B'])
get_fasta(open('pdb1olg.ent'), open('1olg.fasta', 'w'), transfer_ids=['1OLG:B'])
get_fasta(open('pdb1ycq.ent'), open('1ycq.fasta', 'w'), transfer_ids=['1YCQ:B'])
1TUP:B SSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNT 219 1OLG:B KKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG 42 1YCQ:B PLSQETFSDLWKLLPEN 17