A while ago there was a question on Twitter about highlighting bonds which changed in a reaction. I put together a quick bit of example code to answer that question and made a note to do a blog post on the topic. I'm finally getting around to doing that blog post.
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdChemReactions
import rdkit
print(rdkit.__version__)
2021.09.2
Here's something similar to the reaction from the question:
rxn1 = rdChemReactions.ReactionFromRxnBlock('''$RXN
Mrv2102 111820212128
2 1
$MOL
Mrv2102 11182121282D
13 13 0 0 0 0 999 V2000
-7.5723 2.6505 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0
-6.8579 2.2380 0.0000 O 0 0 0 0 0 0 0 0 0 2 0 0
-6.8580 1.4130 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0
-6.1435 1.0004 0.0000 O 0 0 0 0 0 0 0 0 0 4 0 0
-7.5725 1.0005 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0
-7.5725 0.1755 0.0000 N 0 0 0 0 0 0 0 0 0 6 0 0
-8.2869 -0.2369 0.0000 C 0 0 0 0 0 0 0 0 0 7 0 0
-8.2870 -1.0620 0.0000 C 0 0 0 0 0 0 0 0 0 8 0 0
-9.0015 -1.4745 0.0000 C 0 0 0 0 0 0 0 0 0 9 0 0
-9.0015 -2.2995 0.0000 C 0 0 0 0 0 0 0 0 0 10 0 0
-8.2870 -2.7120 0.0000 C 0 0 0 0 0 0 0 0 0 11 0 0
-7.5726 -2.2995 0.0000 C 0 0 0 0 0 0 0 0 0 12 0 0
-7.5726 -1.4745 0.0000 C 0 0 0 0 0 0 0 0 0 13 0 0
1 2 1 0 0 0 0
2 3 1 0 0 0 0
3 4 2 0 0 0 0
3 5 1 0 0 0 0
5 6 1 0 0 0 0
6 7 2 0 0 0 0
7 8 1 0 0 0 0
8 9 1 0 0 0 0
8 13 2 0 0 0 0
9 10 2 0 0 0 0
10 11 1 0 0 0 0
11 12 2 0 0 0 0
12 13 1 0 0 0 0
M END
$MOL
Mrv2102 11182121282D
12 11 0 0 0 0 999 V2000
-3.7934 0.7703 0.0000 C 0 0 0 0 0 0 0 0 0 14 0 0
-3.0790 1.1828 0.0000 C 0 0 0 0 0 0 0 0 0 15 0 0
-2.3645 0.7703 0.0000 C 0 0 0 0 0 0 0 0 0 16 0 0
-3.7934 -0.0547 0.0000 C 0 0 0 0 0 0 0 0 0 17 0 0
-4.5078 -0.4672 0.0000 O 0 0 0 0 0 0 0 0 0 18 0 0
-3.0789 -0.4671 0.0000 O 0 0 0 0 0 0 0 0 0 19 0 0
-1.6500 1.1828 0.0000 O 0 0 0 0 0 0 0 0 0 20 0 0
-2.3645 -0.0547 0.0000 O 0 0 0 0 0 0 0 0 0 21 0 0
-3.0788 -1.2922 0.0000 C 0 0 0 0 0 0 0 0 0 22 0 0
-1.6500 -0.4672 0.0000 C 0 0 0 0 0 0 0 0 0 23 0 0
-2.3644 -1.7046 0.0000 C 0 0 0 0 0 0 0 0 0 24 0 0
-1.6500 -1.2922 0.0000 C 0 0 0 0 0 0 0 0 0 25 0 0
1 2 2 0 0 0 0
1 4 1 0 0 0 0
2 3 1 0 0 0 0
3 7 2 0 0 0 0
3 8 1 0 0 0 0
4 5 2 0 0 0 0
4 6 1 0 0 0 0
6 9 1 0 0 0 0
8 10 1 0 0 0 0
9 11 1 0 0 0 0
10 12 1 0 0 0 0
M END
$MOL
Mrv2102 11182121282D
25 26 0 0 0 0 999 V2000
5.1328 0.9532 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0
5.8002 0.4683 0.0000 N 0 0 0 0 0 0 0 0 0 6 0 0
5.5453 -0.3163 0.0000 C 0 0 0 0 0 0 0 0 0 7 0 0
4.7203 -0.3163 0.0000 C 0 0 0 0 0 0 0 0 0 14 0 0
4.4654 0.4683 0.0000 C 0 0 0 0 0 0 0 0 0 15 0 0
5.1328 1.7782 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0
3.6807 0.7232 0.0000 C 0 0 0 0 0 0 0 0 0 16 0 0
4.2354 -0.9838 0.0000 C 0 0 0 0 0 0 0 0 0 17 0 0
6.0302 -0.9838 0.0000 C 0 0 0 0 0 0 0 0 0 8 0 0
6.8507 -0.8975 0.0000 C 0 0 0 0 0 0 0 0 0 9 0 0
7.3356 -1.5650 0.0000 C 0 0 0 0 0 0 0 0 0 10 0 0
7.0001 -2.3187 0.0000 C 0 0 0 0 0 0 0 0 0 11 0 0
6.1796 -2.4049 0.0000 C 0 0 0 0 0 0 0 0 0 12 0 0
5.6947 -1.7375 0.0000 C 0 0 0 0 0 0 0 0 0 13 0 0
3.4149 -0.8975 0.0000 O 0 0 0 0 0 0 0 0 0 18 0 0
4.5709 -1.7375 0.0000 O 0 0 0 0 0 0 0 0 0 19 0 0
4.0860 -2.4049 0.0000 C 0 0 0 0 0 0 0 0 0 22 0 0
3.2655 -2.3187 0.0000 C 0 0 0 0 0 0 0 0 0 24 0 0
3.5092 1.5302 0.0000 O 0 0 0 0 0 0 0 0 0 20 0 0
3.0676 0.1712 0.0000 O 0 0 0 0 0 0 0 0 0 21 0 0
2.2830 0.4261 0.0000 C 0 0 0 0 0 0 0 0 0 23 0 0
1.6699 -0.1259 0.0000 C 0 0 0 0 0 0 0 0 0 25 0 0
5.8473 2.1907 0.0000 O 0 0 0 0 0 0 0 0 0 4 0 0
4.4183 2.1907 0.0000 O 0 0 0 0 0 0 0 0 0 2 0 0
4.4183 3.0157 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0
1 2 1 0 0 0 0
2 3 1 0 0 0 0
3 4 1 0 0 0 0
4 5 1 0 0 0 0
1 5 1 0 0 0 0
1 6 1 0 0 0 0
5 7 1 0 0 0 0
4 8 1 0 0 0 0
3 9 1 0 0 0 0
10 11 2 0 0 0 0
11 12 1 0 0 0 0
12 13 2 0 0 0 0
13 14 1 0 0 0 0
9 10 1 0 0 0 0
9 14 2 0 0 0 0
8 15 2 0 0 0 0
8 16 1 0 0 0 0
16 17 1 0 0 0 0
17 18 1 0 0 0 0
7 19 2 0 0 0 0
7 20 1 0 0 0 0
20 21 1 0 0 0 0
21 22 1 0 0 0 0
6 23 2 0 0 0 0
6 24 1 0 0 0 0
24 25 1 0 0 0 0
M END
''')
Let's take a look at the reaction:
IPythonConsole.molSize = (600,250)
IPythonConsole.highlightByReactant = True
rxn1
We can ask the reaction to tell us which atoms in the reactants are modified in the reaction:
rxn1.Initialize()
rxn1.GetReactingAtoms()
((4, 5, 6), (0, 1))
The information about which atoms react is enough to figure out which bonds change, but we have to do some additional work for this:
from collections import namedtuple
AtomInfo = namedtuple('AtomInfo',('mapnum','reactant','reactantAtom','product','productAtom'))
def map_reacting_atoms_to_products(rxn,reactingAtoms):
''' figures out which atoms in the products each mapped atom in the reactants maps to '''
res = []
for ridx,reacting in enumerate(reactingAtoms):
reactant = rxn.GetReactantTemplate(ridx)
for raidx in reacting:
mapnum = reactant.GetAtomWithIdx(raidx).GetAtomMapNum()
foundit=False
for pidx,product in enumerate(rxn.GetProducts()):
for paidx,patom in enumerate(product.GetAtoms()):
if patom.GetAtomMapNum()==mapnum:
res.append(AtomInfo(mapnum,ridx,raidx,pidx,paidx))
foundit = True
break
if foundit:
break
return res
def get_mapped_neighbors(atom):
''' test all mapped neighbors of a mapped atom'''
res = {}
amap = atom.GetAtomMapNum()
if not amap:
return res
for nbr in atom.GetNeighbors():
nmap = nbr.GetAtomMapNum()
if nmap:
if amap>nmap:
res[(nmap,amap)] = (atom.GetIdx(),nbr.GetIdx())
else:
res[(amap,nmap)] = (nbr.GetIdx(),atom.GetIdx())
return res
BondInfo = namedtuple('BondInfo',('product','productAtoms','productBond','status'))
def find_modifications_in_products(rxn):
''' returns a 2-tuple with the modified atoms and bonds from the reaction '''
reactingAtoms = rxn.GetReactingAtoms()
amap = map_reacting_atoms_to_products(rxn,reactingAtoms)
res = []
seen = set()
# this is all driven from the list of reacting atoms:
for _,ridx,raidx,pidx,paidx in amap:
reactant = rxn.GetReactantTemplate(ridx)
ratom = reactant.GetAtomWithIdx(raidx)
product = rxn.GetProductTemplate(pidx)
patom = product.GetAtomWithIdx(paidx)
rnbrs = get_mapped_neighbors(ratom)
pnbrs = get_mapped_neighbors(patom)
for tpl in pnbrs:
pbond = product.GetBondBetweenAtoms(*pnbrs[tpl])
if (pidx,pbond.GetIdx()) in seen:
continue
seen.add((pidx,pbond.GetIdx()))
if not tpl in rnbrs:
# new bond in product
res.append(BondInfo(pidx,pnbrs[tpl],pbond.GetIdx(),'New'))
else:
# present in both reactants and products, check to see if it changed
rbond = reactant.GetBondBetweenAtoms(*rnbrs[tpl])
if rbond.GetBondType()!=pbond.GetBondType():
res.append(BondInfo(pidx,pnbrs[tpl],pbond.GetIdx(),'Changed'))
return amap,res
Let's look at what that function returns for our reaction:
atms,bnds = find_modifications_in_products(rxn1)
print(atms)
print(bnds)
[AtomInfo(mapnum=5, reactant=0, reactantAtom=4, product=0, productAtom=0), AtomInfo(mapnum=6, reactant=0, reactantAtom=5, product=0, productAtom=1), AtomInfo(mapnum=7, reactant=0, reactantAtom=6, product=0, productAtom=2), AtomInfo(mapnum=14, reactant=1, reactantAtom=0, product=0, productAtom=3), AtomInfo(mapnum=15, reactant=1, reactantAtom=1, product=0, productAtom=4)] [BondInfo(product=0, productAtoms=(4, 0), productBond=4, status='New'), BondInfo(product=0, productAtoms=(2, 1), productBond=1, status='Changed'), BondInfo(product=0, productAtoms=(3, 2), productBond=2, status='New'), BondInfo(product=0, productAtoms=(4, 3), productBond=3, status='Changed')]
Finally, define the funciton which we'll use to draw the product molecule with highlights shown for bonds and atoms involved in the reaction:
from IPython.display import Image
def draw_product_with_modified_bonds(rxn,atms,bnds,productIdx=None,showAtomMaps=False):
if productIdx is None:
pcnts = [x.GetNumAtoms() for x in rxn.GetProducts()]
largestProduct = list(sorted(zip(pcnts,range(len(pcnts))),reverse=True))[0][1]
productIdx = largestProduct
d2d = Draw.rdMolDraw2D.MolDraw2DCairo(350,300)
pmol = Chem.Mol(rxn.GetProductTemplate(productIdx))
Chem.SanitizeMol(pmol)
if not showAtomMaps:
for atom in pmol.GetAtoms():
atom.SetAtomMapNum(0)
bonds_to_highlight=[]
highlight_bond_colors={}
atoms_seen = set()
for binfo in bnds:
if binfo.product==productIdx and binfo.status=='New':
bonds_to_highlight.append(binfo.productBond)
atoms_seen.update(binfo.productAtoms)
highlight_bond_colors[binfo.productBond] = (1,.4,.4)
if binfo.product==productIdx and binfo.status=='Changed':
bonds_to_highlight.append(binfo.productBond)
atoms_seen.update(binfo.productAtoms)
highlight_bond_colors[binfo.productBond] = (.4,.4,1)
atoms_to_highlight=set()
for ainfo in atms:
if ainfo.product != productIdx or ainfo.productAtom in atoms_seen:
continue
atoms_to_highlight.add(ainfo.productAtom)
d2d.drawOptions().useBWAtomPalette()
d2d.drawOptions().continuousHighlight=False
d2d.drawOptions().highlightBondWidthMultiplier = 24
d2d.drawOptions().setHighlightColour((.9,.9,0))
d2d.drawOptions().fillHighlights=False
atoms_to_highlight.update(atoms_seen)
d2d.DrawMolecule(pmol,highlightAtoms=atoms_to_highlight,highlightBonds=bonds_to_highlight,
highlightBondColors=highlight_bond_colors)
d2d.FinishDrawing()
return d2d.GetDrawingText()
Now we can draw the highlighted product molecule:
Image(draw_product_with_modified_bonds(rxn1,atms,bnds))
Let's look at some other reactions. I will use the SI data from
import pandas as pd
df = pd.read_csv('../data/reaction_data_ci6b00564/dataSetB.csv')
df.head()
rxn_Class | patentID | rxnSmiles_Mapping_NameRxn | reactantSet_NameRxn | NameRxn_Mapping_Complete | rxnSmiles_Mapping_IndigoTK | reactantSet_IndigoTK | IndigoTK_Mapping_Complete | rxnSmiles_IndigoAutoMapperKNIME | reactantSet_IndigoAutoMapperKNIME | IndigoAutoMapperKNIME_Mapping_Complete | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 6 | US05849732 | C.CCCCCC.CO.O=C(OCc1ccccc1)[NH:1][CH2:2][CH2:3... | set([3, 4]) | True | C(OC([NH:11][CH2:12][CH2:13][CH2:14][CH2:15][C... | set([0, 2]) | True | C.CCCCCC.CO.[CH3:10][O:11][C:12]([C@@H:14]([NH... | set([3, 4]) | True |
1 | 2 | US20120114765A1 | O[C:1](=[O:2])[c:3]1[cH:4][c:5]([N+:6](=[O:7])... | set([0, 1]) | True | [Cl:1][c:2]1[cH:3][n:4][cH:5][c:6]([Cl:20])[c:... | set([0, 1]) | True | [NH2:1][c:2]1[c:11]2[c:6]([cH:7][n:8][cH:9][cH... | set([0, 1]) | True |
2 | 1 | US08003648B2 | Cl.O=[CH:1][c:2]1[cH:3][cH:4][c:5](-[c:6]2[n:7... | set([1, 3]) | True | [CH2:1]([NH:3][CH2:4][CH3:5])[CH3:2].C([BH3-])... | set([0, 3]) | True | [CH3:1][CH2:2][NH:3][CH2:4][CH3:5].[CH3:6][c:7... | set([0, 1]) | True |
3 | 1 | US09045475B2 | CC(=O)O[BH-](OC(C)=O)OC(C)=O.ClCCl.O=[C:1]([CH... | set([2, 3]) | True | [nH:1]1[c:5]2[n:6][cH:7][c:8]([O:10][c:11]3[cH... | set([0, 3]) | True | CC(O[BH-](OC(=O)C)OC(=O)C)=O.[CH3:14][C:15]1([... | set([1, 3]) | True |
4 | 2 | US08188098B2 | CCN(C(C)C)C(C)C.ClCCl.Cl[C:1](=[O:2])[O:3][CH:... | set([2, 5]) | True | Cl[C:2]([O:4][CH:5]1[CH2:9][CH2:8][CH2:7][CH2:... | set([0, 2]) | True | CCN(C(C)C)C(C)C.[CH3:10][CH2:11][O:12][c:13]1[... | set([1, 4]) | True |
rxnclass = 1
class_smis = df[df['rxn_Class']==rxnclass].rxnSmiles_Mapping_NameRxn.to_list()
rxn = rdChemReactions.ReactionFromSmarts(class_smis[3],useSmiles=True)
rxn.Initialize()
atms,bnds = find_modifications_in_products(rxn)
print(atms)
print(bnds)
Image(draw_product_with_modified_bonds(rxn,atms,bnds))
[AtomInfo(mapnum=1, reactant=3, reactantAtom=1, product=0, productAtom=0), AtomInfo(mapnum=7, reactant=4, reactantAtom=0, product=0, productAtom=6)] [BondInfo(product=0, productAtoms=(6, 0), productBond=5, status='New')]
rxnclass = 4
class_smis = df[df['rxn_Class']==rxnclass].rxnSmiles_Mapping_NameRxn.to_list()
rxn = rdChemReactions.ReactionFromSmarts(class_smis[1],useSmiles=True)
rxn.Initialize()
atms,bnds = find_modifications_in_products(rxn)
print(atms)
print(bnds)
Image(draw_product_with_modified_bonds(rxn,atms,bnds))
[AtomInfo(mapnum=1, reactant=1, reactantAtom=1, product=0, productAtom=0), AtomInfo(mapnum=2, reactant=1, reactantAtom=2, product=0, productAtom=9), AtomInfo(mapnum=16, reactant=2, reactantAtom=0, product=0, productAtom=18), AtomInfo(mapnum=17, reactant=2, reactantAtom=1, product=0, productAtom=16), AtomInfo(mapnum=19, reactant=2, reactantAtom=3, product=0, productAtom=15)] [BondInfo(product=0, productAtoms=(9, 0), productBond=8, status='Changed'), BondInfo(product=0, productAtoms=(18, 0), productBond=18, status='New'), BondInfo(product=0, productAtoms=(15, 9), productBond=14, status='New'), BondInfo(product=0, productAtoms=(16, 18), productBond=17, status='Changed'), BondInfo(product=0, productAtoms=(15, 16), productBond=15, status='Changed')]
Look at an example where there are no changed bonds in the products but where there is a changed atom:
rxnclass = 7
class_smis = df[df['rxn_Class']==rxnclass].rxnSmiles_Mapping_NameRxn.to_list()
rxn = rdChemReactions.ReactionFromSmarts(class_smis[1],useSmiles=True)
rxn.Initialize()
atms,bnds = find_modifications_in_products(rxn)
print(atms)
print(bnds)
Image(draw_product_with_modified_bonds(rxn,atms,bnds))
[AtomInfo(mapnum=1, reactant=1, reactantAtom=1, product=0, productAtom=0)] []