In [1]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import RDConfig
import os
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
In [2]:
cdk2mols = [x for x in Chem.SDMolSupplier('data/cdk2.sdf',removeHs=False) if x is not None]
In [3]:
cdk2mols[0]
Out[3]:
In [4]:
cdk2mols[1]
Out[4]:

PyMol integration

For the below code to work, you need to have PyMol running with the XML-RPC server active (-R argument to PyMol on launch)

In [5]:
from rdkit.Chem import PyMol
v = PyMol.MolViewer()
In [6]:
v.ShowMol(cdk2mols[0])
v.GetPNG(h=300)
Out[6]:
In [7]:
v.ShowMol(cdk2mols[1],name=cdk2mols[1].GetProp('_Name'))   # <- this replaces the current molecule
v.GetPNG(h=300)
Out[7]:
In [8]:
v.ShowMol(cdk2mols[2],showOnly=False,name=cdk2mols[2].GetProp('_Name'))   # <- this replaces the current molecule
v.GetPNG(h=300)
Out[8]:

Conformers and Conformation Generation

In the RDKit atom positions are not stored on the atoms themselves. They are associated with Conformer objects stored with molecules.

Molecules do not have to have conformers, and they can have more than one.

In [9]:
import copy
tm = copy.deepcopy(cdk2mols[0])
tm.RemoveAllConformers()
tm.GetNumConformers()
Out[9]:
0

Now use the RDKit's distance-geometry conformation generator to add some conformers:

In [10]:
cids=AllChem.EmbedMultipleConfs(tm,pruneRmsThresh=1.0)

Clean the conformations up using the built-in UFF implementation:

In [11]:
for cid in cids: AllChem.UFFOptimizeMolecule(tm,confId=cid)
In [12]:
v.ShowMol(tm,name='conf-0',confId=cids[0])
v.GetPNG(h=300)
Out[12]:
In [13]:
v.ShowMol(tm,name='conf-1',confId=cids[1],showOnly=False)
v.GetPNG(h=300)
Out[13]:

hmm, that doesn't help to see the differences. Align the conformations on the core:

In [14]:
tm
Out[14]:
In [15]:
core = Chem.MolFromSmarts('c12ncnc1ncnc2')
match=tm.GetSubstructMatch(core)
tm
Out[15]:
In [16]:
match
Out[16]:
(8, 12, 11, 10, 9, 13, 14, 15, 7)
In [17]:
AllChem.AlignMolConformers(tm,atomIds=match)
In [18]:
v.ShowMol(tm,name='conf-1',confId=cids[1],showOnly=False)  # <- the same name we already used, so PyMol replaces the object
v.GetPNG(h=300)
Out[18]:

ok, that works; show all the conformations

In [19]:
v.DeleteAll()
for cid in cids: 
    v.ShowMol(tm,confId=cid,name='Conf-%d'%cid,showOnly=False)
v.GetPNG(h=300)
Out[19]:

What about getting atom positions?

In [20]:
conf = tm.GetConformer()   # <- gives the default (first) conformer
conf.GetAtomPosition(0)
Out[20]:
<rdkit.Geometry.rdGeometry.Point3D at 0x1081197c0>
In [21]:
list(conf.GetAtomPosition(0))
Out[21]:
[3.0183961749827337, 2.0407200114327901, -0.016277779246558433]

Aligning molecules to each other

In [54]:
m1 = cdk2mols[0]
m2 = cdk2mols[1]
v.ShowMol(m1,name=m1.GetProp('_Name'))
v.ShowMol(m2,name=m2.GetProp('_Name'),showOnly=False)
v.GetPNG(h=300)
Out[54]:
In [23]:
core
Out[23]:
In [24]:
match1 = m1.GetSubstructMatch(core)
match2= m2.GetSubstructMatch(core)
In [25]:
AllChem.AlignMol(m2,m1,atomMap=zip(match2,match1))   # <- m2 is aligned to m1, return value is the RMSD for the alignment
Out[25]:
0.0035453672774079657
In [26]:
v.ShowMol(m1,name=m1.GetProp('_Name'))
v.ShowMol(m2,name=m2.GetProp('_Name'),showOnly=False)
v.GetPNG(h=300)
Out[26]:

That alignment step has gotten a lot easier with the new MCS code:

In [27]:
from rdkit.Chem import MCS
In [28]:
m1 = cdk2mols[0]
m2 = cdk2mols[2]
m1
Out[28]:
In [29]:
m2
Out[29]:
In [30]:
mcs = MCS.FindMCS([m1,m2],ringMatchesRingOnly=True)
mcs
Out[30]:
MCSResult(numAtoms=19, numBonds=20, smarts='[#6]:1(-!@[#1]):[#7]:[#6]:2:[#6](:[#7]:[#6](-!@[#7](-!@[#1])-!@[#1]):[#7]:[#6]:2-!@[#8]-!@[#6](-!@[#1])(-!@[#1])-!@[#6]):[#7]:1-!@[#1]', completed=1)
In [31]:
core = Chem.MolFromSmarts(mcs.smarts)
core
Out[31]:
In [32]:
match1 = m1.GetSubstructMatch(core)
match2= m2.GetSubstructMatch(core)
AllChem.AlignMol(m2,m1,atomMap=zip(match2,match1))   # <- m2 is aligned to m1
Out[32]:
0.56448382703159583
In [33]:
v.ShowMol(m1,name=m1.GetProp('_Name'))
v.ShowMol(m2,name=m2.GetProp('_Name'),showOnly=False)
v.GetPNG(h=300)
Out[33]:
In [34]:
m1
Out[34]:
In [35]:
m2
Out[35]:

Chemical Features

In [36]:
ffact = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef'))
In [37]:
ffact.GetFeatureFamilies()
Out[37]:
('Donor',
 'Acceptor',
 'NegIonizable',
 'PosIonizable',
 'ZnBinder',
 'Aromatic',
 'Hydrophobe',
 'LumpedHydrophobe')
In [38]:
m = cdk2mols[0]
feats = ffact.GetFeaturesForMol(m)
[x.GetFamily() for x in feats]
Out[38]:
['Donor',
 'Donor',
 'Donor',
 'Donor',
 'Acceptor',
 'Acceptor',
 'Acceptor',
 'Acceptor',
 'Acceptor',
 'PosIonizable',
 'Aromatic',
 'Aromatic',
 'Hydrophobe',
 'Hydrophobe',
 'Hydrophobe',
 'LumpedHydrophobe']
In [39]:
d1 = feats[0]
In [40]:
d1.GetAtomIds()
Out[40]:
(10,)
In [41]:
list(d1.GetPos())
Out[41]:
[-4.0853999999999999, -1.1212, -0.083099999999999993]
In [42]:
list(m.GetConformer().GetAtomPosition(10))
Out[42]:
[-4.0853999999999999, -1.1212, -0.083099999999999993]

Let's look at the features in 3D.

In [43]:
v.ShowMol(m,name=m.GetProp('_Name'))
Out[43]:
''
In [44]:
donors = ffact.GetFeaturesForMol(m,includeOnly='Donor')
In [45]:
for donor in donors: 
    v.server.sphere(list(donor.GetPos()),0.5,(1,0,1),'donors',1)
v.GetPNG(h=300)
Out[45]:

What's up with those "extra" donors?

In [46]:
defs=ffact.GetFeatureDefs()
In [47]:
sorted(defs.keys())
Out[47]:
['Acceptor.SingleAtomAcceptor',
 'Aromatic.Arom4',
 'Aromatic.Arom5',
 'Aromatic.Arom6',
 'Aromatic.Arom7',
 'Aromatic.Arom8',
 'Donor.SingleAtomDonor',
 'Hydrophobe.ChainTwoWayAttach',
 'Hydrophobe.ThreeWayAttach',
 'LumpedHydrophobe.Nitro2',
 'LumpedHydrophobe.RH3_3',
 'LumpedHydrophobe.RH4_4',
 'LumpedHydrophobe.RH5_5',
 'LumpedHydrophobe.RH6_6',
 'LumpedHydrophobe.iPropyl',
 'LumpedHydrophobe.tButyl',
 'NegIonizable.AcidicGroup',
 'PosIonizable.BasicGroup',
 'PosIonizable.Guanidine',
 'PosIonizable.Imidazole',
 'PosIonizable.PosN',
 'ZnBinder.ZnBinder1',
 'ZnBinder.ZnBinder2',
 'ZnBinder.ZnBinder3',
 'ZnBinder.ZnBinder4',
 'ZnBinder.ZnBinder5',
 'ZnBinder.ZnBinder6']
In [48]:
defs['Donor.SingleAtomDonor']
Out[48]:
'[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),$([$(n[n;H1]),$(nc[n;H1])])]),$([O,S;H1;+0])]'

The feature definitions include some tautomer recognition:

[$([N&!H0&v3,
     N&!H0&+1&v4,
     n&H1&+0,
     $([$([Nv3](-C)(-C)-C)]),
     $([$(n[n;H1]),      # <- here
     $(nc[n;H1])])]),    # <- and here
 $([O,S;H1;+0])]
In [49]:
for feat in ffact.GetFeaturesForMol(m,includeOnly='PosIonizable'): 
    v.server.sphere(list(feat.GetPos()),0.5,(1,0,0),'Pos',1) 
v.GetPNG(h=300)
Out[49]:
In [50]:
defs['PosIonizable.Imidazole']
Out[50]:
'c1ncnc1'

There's a command-line tool to show all the features and some additional information.

Unfortunately this weems to currently only work as a command-line tool, not via import and direct function calls.

In [60]:
print >>file('data/output.mol','w+'),Chem.MolToMolBlock(m)
In [61]:
!python $RDBASE/rdkit/Chem/Features/ShowFeats.py data/output.mol
In [62]:
v.GetPNG()
Out[62]:

Constrained Conformer Generation

Generate a conformation (or conformations) of a molecule where some atoms have a fixed geometry. For example: explore different sidechain conformations with a fixed core.

In [51]:
cdk2mols[0]
Out[51]:
In [63]:
cdk2mols[1]
Out[63]:
In [64]:
core = Chem.MolFromSmarts('c12ncn([#1])c1nc(N)nc2OC')
core
Out[64]:
In [65]:
em = Chem.EditableMol(cdk2mols[0])
match = cdk2mols[0].GetSubstructMatch(core)
for idx in range(cdk2mols[0].GetNumAtoms()-1,-1,-1): 
    if idx not in match:
        em.RemoveAtom(idx)
coreMol = em.GetMol()
Chem.SanitizeMol(coreMol)
coreMol
Out[65]:
In [66]:
newMol = Chem.MolFromSmiles('c12nc[nH]c1nc(N)nc2OCc1cc(C)ccc1')
newMol
Out[66]:
In [67]:
newMol=Chem.AddHs(newMol)
newMol3D=AllChem.ConstrainedEmbed(newMol,coreMol)
In [68]:
v.ShowMol(cdk2mols[0],name='reference')
v.ShowMol(newMol3D,name='new mol',showOnly=False)
v.GetPNG(h=300)
Out[68]:

Something completely different: shape-based alignment

Python implementation of the subshape alignment algorithm: Putta S, Eksterowicz J, Lemmen C, Stanton R (2003) J Chem Inf Comput Sci 43:1623

In [69]:
from rdkit.Chem.Subshape import SubshapeBuilder,SubshapeAligner,SubshapeObjects
In [70]:
Draw.MolsToGridImage(cdk2mols[:20],molsPerRow=4)
Out[70]:
In [71]:
ref = Chem.Mol(cdk2mols[0].ToBinary())
probe = Chem.Mol(cdk2mols[1].ToBinary())
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
Out[71]:
In [72]:
AllChem.CanonicalizeConformer(ref.GetConformer())
builder = SubshapeBuilder.SubshapeBuilder()
builder.gridDims=(20.,20.,10)
builder.gridSpacing=0.5
builder.winRad=4.

refShape = builder.GenerateSubshapeShape(ref)
probeShape = builder(probe,terminalPointsOnly=True)
generate all possible
Compute centroids: 1149
remove points: 1149

In [73]:
v.ShowMol(ref,name='ref')
SubshapeObjects.DisplaySubshape(v,refShape,'ref_shape')
v.server.do('set transparency=0.5')
v.GetPNG()
Out[73]:
In [74]:
aligner = SubshapeAligner.SubshapeAligner()

algs = aligner.GetSubshapeAlignments(ref,refShape,probe,probeShape,builder)
algs
Out[74]:
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5090>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ab350>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ab590>]
In [75]:
alg = algs[0]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
Out[75]:
In [76]:
probe = Chem.Mol(cdk2mols[8].ToBinary())
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
Out[76]:
In [77]:
probeShape = builder(probe,terminalPointsOnly=True)
aligner = SubshapeAligner.SubshapeAligner()

algs = aligner.GetSubshapeAlignments(ref,refShape,probe,probeShape,builder)
algs
Out[77]:
[<rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a47d0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4650>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4450>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4250>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081a4110>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082ce990>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x105a99890>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca9d0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081caf90>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081caf50>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081cadd0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca7d0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081ca4d0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081abed0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1081aba50>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e5cd0>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e3650>,
 <rdkit.Chem.Subshape.SubshapeAligner.SubshapeAlignment at 0x1082e3410>]
In [78]:
alg = algs[0]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
Out[78]:
In [79]:
probe = Chem.Mol(cdk2mols[8].ToBinary())
alg = algs[1]
AllChem.TransformMol(probe,alg.transform)
v.ShowMol(ref,name='ref')
v.ShowMol(probe,name='probe',showOnly=False)
v.GetPNG()
Out[79]: