from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
import os
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
cdk2mols = [x for x in Chem.SDMolSupplier('data/cdk2.sdf',removeHs=False) if x is not None]
cdk2mols[0]
cdk2mols[1]
For the below code to work, you need to have PyMol running with the XML-RPC server active (-R
argument to PyMol on launch)
from rdkit.Chem import PyMol
v = PyMol.MolViewer()
v.ShowMol(cdk2mols[0])
v.GetPNG(h=300)
v.ShowMol(cdk2mols[1],name=cdk2mols[1].GetProp('_Name')) # <- this replaces the current molecule
v.GetPNG(h=300)
v.ShowMol(cdk2mols[2],showOnly=False,name=cdk2mols[2].GetProp('_Name')) # <- this replaces the current molecule
v.GetPNG(h=300)
In the RDKit atom positions are not stored on the atoms themselves. They are associated with Conformer
objects stored with molecules.
Molecules do not have to have conformers, and they can have more than one.
import copy
tm = copy.deepcopy(cdk2mols[0])
tm.RemoveAllConformers()
tm.GetNumConformers()
0
Now use the RDKit's distance-geometry conformation generator to add some conformers:
cids=AllChem.EmbedMultipleConfs(tm,pruneRmsThresh=1.0)
Clean the conformations up using the built-in UFF implementation:
for cid in cids: AllChem.UFFOptimizeMolecule(tm,confId=cid)
v.ShowMol(tm,name='conf-0',confId=cids[0])
v.GetPNG(h=300)
v.ShowMol(tm,name='conf-1',confId=cids[1],showOnly=False)
v.GetPNG(h=300)
hmm, that doesn't help to see the differences. Align the conformations on the core:
tm
core = Chem.MolFromSmarts('c12ncnc1ncnc2')
match=tm.GetSubstructMatch(core)
tm
match
(8, 12, 11, 10, 9, 13, 14, 15, 7)
AllChem.AlignMolConformers(tm,atomIds=match)
v.ShowMol(tm,name='conf-1',confId=cids[1],showOnly=False) # <- the same name we already used, so PyMol replaces the object
v.GetPNG(h=300)
ok, that works; show all the conformations
v.DeleteAll()
for cid in cids:
v.ShowMol(tm,confId=cid,name='Conf-%d'%cid,showOnly=False)
v.GetPNG(h=300)
What about getting atom positions?
conf = tm.GetConformer() # <- gives the default (first) conformer
conf.GetAtomPosition(0)
<rdkit.Geometry.rdGeometry.Point3D at 0x1081197c0>
list(conf.GetAtomPosition(0))
[3.0183961749827337, 2.0407200114327901, -0.016277779246558433]
m1 = cdk2mols[0]
m2 = cdk2mols[1]
v.ShowMol(m1,name=m1.GetProp('_Name'))
v.ShowMol(m2,name=m2.GetProp('_Name'),showOnly=False)
v.GetPNG(h=300)
core
match1 = m1.GetSubstructMatch(core)
match2= m2.GetSubstructMatch(core)
AllChem.AlignMol(m2,m1,atomMap=zip(match2,match1)) # <- m2 is aligned to m1, return value is the RMSD for the alignment
0.0035453672774079657
v.ShowMol(m1,name=m1.GetProp('_Name'))
v.ShowMol(m2,name=m2.GetProp('_Name'),showOnly=False)
v.GetPNG(h=300)
That alignment step has gotten a lot easier with the new MCS code:
from rdkit.Chem import MCS
m1 = cdk2mols[0]
m2 = cdk2mols[2]
m1
m2
mcs = MCS.FindMCS([m1,m2],ringMatchesRingOnly=True)
mcs
MCSResult(numAtoms=19, numBonds=20, smarts='[#6]:1(-!@[#1]):[#7]:[#6]:2:[#6](:[#7]:[#6](-!@[#7](-!@[#1])-!@[#1]):[#7]:[#6]:2-!@[#8]-!@[#6](-!@[#1])(-!@[#1])-!@[#6]):[#7]:1-!@[#1]', completed=1)
core = Chem.MolFromSmarts(mcs.smarts)
core
match1 = m1.GetSubstructMatch(core)
match2= m2.GetSubstructMatch(core)
AllChem.AlignMol(m2,m1,atomMap=zip(match2,match1)) # <- m2 is aligned to m1
0.56448382703159583
v.ShowMol(m1,name=m1.GetProp('_Name'))
v.ShowMol(m2,name=m2.GetProp('_Name'),showOnly=False)
v.GetPNG(h=300)
m1
m2
ffact = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef'))
ffact.GetFeatureFamilies()
('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable', 'ZnBinder', 'Aromatic', 'Hydrophobe', 'LumpedHydrophobe')
m = cdk2mols[0]
feats = ffact.GetFeaturesForMol(m)
[x.GetFamily() for x in feats]
['Donor', 'Donor', 'Donor', 'Donor', 'Acceptor', 'Acceptor', 'Acceptor', 'Acceptor', 'Acceptor', 'PosIonizable', 'Aromatic', 'Aromatic', 'Hydrophobe', 'Hydrophobe', 'Hydrophobe', 'LumpedHydrophobe']
d1 = feats[0]
d1.GetAtomIds()
(10,)
list(d1.GetPos())
[-4.0853999999999999, -1.1212, -0.083099999999999993]
list(m.GetConformer().GetAtomPosition(10))
[-4.0853999999999999, -1.1212, -0.083099999999999993]
Let's look at the features in 3D.
v.ShowMol(m,name=m.GetProp('_Name'))
''
donors = ffact.GetFeaturesForMol(m,includeOnly='Donor')
for donor in donors:
v.server.sphere(list(donor.GetPos()),0.5,(1,0,1),'donors',1)
v.GetPNG(h=300)
What's up with those "extra" donors?
defs=ffact.GetFeatureDefs()
sorted(defs.keys())
['Acceptor.SingleAtomAcceptor', 'Aromatic.Arom4', 'Aromatic.Arom5', 'Aromatic.Arom6', 'Aromatic.Arom7', 'Aromatic.Arom8', 'Donor.SingleAtomDonor', 'Hydrophobe.ChainTwoWayAttach', 'Hydrophobe.ThreeWayAttach', 'LumpedHydrophobe.Nitro2', 'LumpedHydrophobe.RH3_3', 'LumpedHydrophobe.RH4_4', 'LumpedHydrophobe.RH5_5', 'LumpedHydrophobe.RH6_6', 'LumpedHydrophobe.iPropyl', 'LumpedHydrophobe.tButyl', 'NegIonizable.AcidicGroup', 'PosIonizable.BasicGroup', 'PosIonizable.Guanidine', 'PosIonizable.Imidazole', 'PosIonizable.PosN', 'ZnBinder.ZnBinder1', 'ZnBinder.ZnBinder2', 'ZnBinder.ZnBinder3', 'ZnBinder.ZnBinder4', 'ZnBinder.ZnBinder5', 'ZnBinder.ZnBinder6']
defs['Donor.SingleAtomDonor']
'[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),$([$(n[n;H1]),$(nc[n;H1])])]),$([O,S;H1;+0])]'
The feature definitions include some tautomer recognition:
[$([N&!H0&v3,
N&!H0&+1&v4,
n&H1&+0,
$([$([Nv3](-C)(-C)-C)]),
$([$(n[n;H1]), # <- here
$(nc[n;H1])])]), # <- and here
$([O,S;H1;+0])]
for feat in ffact.GetFeaturesForMol(m,includeOnly='PosIonizable'):
v.server.sphere(list(feat.GetPos()),0.5,(1,0,0),'Pos',1)
v.GetPNG(h=300)
defs['PosIonizable.Imidazole']
'c1ncnc1'
There's a command-line tool to show all the features and some additional information.
Unfortunately this weems to currently only work as a command-line tool, not via import and direct function calls.
print >>file('data/output.mol','w+'),Chem.MolToMolBlock(m)
!python $RDBASE/rdkit/Chem/Features/ShowFeats.py data/output.mol
v.GetPNG()
Generate a conformation (or conformations) of a molecule where some atoms have a fixed geometry. For example: explore different sidechain conformations with a fixed core.
cdk2mols[0]
cdk2mols[1]
core = Chem.MolFromSmarts('c12ncn([#1])c1nc(N)nc2OC')
core
em = Chem.EditableMol(cdk2mols[0])
match = cdk2mols[0].GetSubstructMatch(core)
for idx in range(cdk2mols[0].GetNumAtoms()-1,-1,-1):
if idx not in match:
em.RemoveAtom(idx)
coreMol = em.GetMol()
Chem.SanitizeMol(coreMol)
coreMol
newMol = Chem.MolFromSmiles('c12nc[nH]c1nc(N)nc2OCc1cc(C)ccc1')
newMol
newMol=Chem.AddHs(newMol)
newMol3D=AllChem.ConstrainedEmbed(newMol,coreMol)
v.ShowMol(cdk2mols[0],name='reference')
v.ShowMol(newMol3D,name='new mol',showOnly=False)
v.GetPNG(h=300)
Python implementation of the subshape alignment algorithm: Putta S, Eksterowicz J, Lemmen C, Stanton R (2003) J Chem Inf Comput Sci 43:1623
from rdkit.Chem.Subshape import SubshapeBuilder,SubshapeAligner,SubshapeObjects
Draw.MolsToGridImage(cdk2mols[:20],molsPerRow=4)
ref = Chem.Mol(cdk2mols[0].ToBinary())
probe = Chem.Mol(cdk2mols[1].ToBinary())
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
AllChem.CanonicalizeConformer(ref.GetConformer())
builder = SubshapeBuilder.SubshapeBuilder()
builder.gridDims=(20.,20.,10)
builder.gridSpacing=0.5
builder.winRad=4.
refShape = builder.GenerateSubshapeShape(ref)
probeShape = builder(probe,terminalPointsOnly=True)
generate all possible Compute centroids: 1149 remove points: 1149
v.ShowMol(ref,name='ref')
SubshapeObjects.DisplaySubshape(v,refShape,'ref_shape')
v.server.do('set transparency=0.5')
v.GetPNG()
aligner = SubshapeAligner.SubshapeAligner()
algs = aligner.GetSubshapeAlignments(ref,refShape,probe,probeShape,builder)
algs
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5090>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ab350>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ab590>]
alg = algs[0]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
probe = Chem.Mol(cdk2mols[8].ToBinary())
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
probeShape = builder(probe,terminalPointsOnly=True)
aligner = SubshapeAligner.SubshapeAligner()
algs = aligner.GetSubshapeAlignments(ref,refShape,probe,probeShape,builder)
algs
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a47d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4650>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4450>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4250>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4110>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082ce990>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x105a99890>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca9d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081caf90>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081caf50>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081cadd0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca7d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca4d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081abed0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081aba50>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5cd0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e3650>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e3410>]
alg = algs[0]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
probe = Chem.Mol(cdk2mols[8].ToBinary())
alg = algs[1]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
probe = Chem.Mol(cdk2mols[8].ToBinary())
alg = algs[2]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
probe = Chem.Mol(cdk2mols[8].ToBinary())
algs = SubshapeAligner.ClusterAlignments(probe,algs,builder,neighborTol=0.15)
algs
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081aba50>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x105a99890>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e3410>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081cadd0>]
probe = Chem.MolFromSmiles('c12nc[nH]c1nc(N)nc2OC')
probe = Chem.AddHs(probe)
probe
AllChem.EmbedMolecule(probe)
AllChem.UFFOptimizeMolecule(probe)
0
probeShape = builder(probe,terminalPointsOnly=True)
aligner = SubshapeAligner.SubshapeAligner()
algs = aligner.GetSubshapeAlignments(ref,refShape,probe,probeShape,builder)
algs = SubshapeAligner.ClusterAlignments(probe,algs,builder,neighborTol=0.15)
algs
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4090>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081b0390>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081b0690>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081aa710>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081aa350>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a41d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4e10>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e59d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca310>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081abc90>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081b0850>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081aa9d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a49d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5490>]
for i,alg in enumerate(algs):
print i,alg.dirMatch,alg.shapeDist,alg.triangleSSD
0 2.66248326482 0.176750330251 8.15896188777 1 2.98012751958 0.155761589404 2.3803356649 2 2.84304320738 0.157294429708 1.21691252773 3 2.94363294614 0.191631130064 1.37025229396 4 2.62804708898 0.174754706974 1.78042433136 5 2.99330988917 0.195744680851 4.73272609537 6 2.74183594084 0.185283218634 3.81173081005 7 2.8610668907 0.16587427364 5.52397719304 8 2.85856686344 0.160444797458 1.78473846848 9 2.67998131271 0.16754756871 0.863973092113 10 2.67921051241 0.181914052201 5.60199801221 11 2.81983801937 0.179730799683 6.64033787519 12 2.8189019846 0.169576719577 3.043602801 13 2.91817082651 0.17642362959 3.03572973243
tprobe = Chem.Mol(probe.ToBinary())
alg = algs[9]
AllChem.TransformMol(tprobe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(tprobe,name='probe',showOnly=False)
v.GetPNG()
builder2 = SubshapeBuilder.SubshapeBuilder()
builder2.gridDims=(20.,20.,10)
builder2.gridSpacing=0.25
builder2.winRad=4.
refShape2 = builder2.GenerateSubshapeShape(ref)
generate all possible Compute centroids: 9288 remove points: 9288
v.ShowMol(ref,name='ref')
SubshapeObjects.DisplaySubshape(v,refShape2,'ref_shape')
v.server.do('set transparency=0.5')
v.GetPNG()
probeShape = builder(probe,terminalPointsOnly=True)
aligner = SubshapeAligner.SubshapeAligner()
algs = aligner.GetSubshapeAlignments(ref,refShape2,probe,probeShape,builder2)
algs = SubshapeAligner.ClusterAlignments(probe,algs,builder2,neighborTol=0.15)
algs
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081bbe90>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081bb410>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081bb150>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081d2f10>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5950>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081b05d0>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081bbb50>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x10a380190>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5410>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5650>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca250>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x10a380490>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca290>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca790>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081b0310>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081b0c10>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x10a386790>, <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5050>]
for i,alg in enumerate(algs):
print i,alg.dirMatch,alg.shapeDist,alg.triangleSSD
0 2.93067907851 0.131556319862 3.80706332639 1 2.72024503463 0.136666886849 1.17435133315 2 2.97135588044 0.121871491778 1.51079728618 3 2.92801388536 0.166903620433 5.42110202354 4 2.77495686177 0.197064643799 3.00219534245 5 2.76002050097 0.17465142404 8.92684700179 6 2.69383690672 0.175705991027 4.24576840787 7 2.72268396662 0.186047279451 4.53909519849 8 2.85290369531 0.186858858066 3.65531141688 9 2.78362319207 0.166342123502 6.53275223755 10 2.76779780106 0.170364500792 0.869320096581 11 2.86271178964 0.174185257192 2.27139063046 12 2.77198648417 0.150054403376 2.59959669474 13 2.73553504715 0.179990773084 0.88495699425 14 2.88126161344 0.172028434452 3.43810994161 15 2.89342979568 0.162430683919 5.6857272519 16 2.77521077026 0.193368341094 4.52138323256 17 2.76013966529 0.197285426505 0.650845999818
tprobe = Chem.Mol(probe.ToBinary())
alg = algs[2]
AllChem.TransformMol(tprobe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(tprobe,name='probe',showOnly=False)
v.GetPNG()