- Generate positional varying molecules in RDkit
- Set up the reactants and reaction definitions
- Single molecule reaction
- Reacting two compounds
The code and discussion in this notebook is inspired and borrowed from:
The Reaction SMARTS or SMIRKS way to query chemical reactions
SMIRKS as per the Daylight definition are used to describe a transform (or reaction) to modify molecules. They are rules to make new molecules but also be used a 'Reaction SMARTS' to search for reactions smiles which match that transformation.
RDKit treats these slightly differently - it has Reaction SMARTS which are used for substructure matching alone.
reaction ::= reactants ">>" products
reactants ::= molecules
products ::= molecules
molecules ::= molecule
molecules "." molecule
molecule ::= a valid SMARTS string without "." characters
"(" a valid SMARTS string without "." characters ")"
Example Reaction SMARTS: | |||
Query: | Target: | Matches: | Comment: |
C>>C | CC>>CC | 4 | No maps, normal match. |
C>>C | [CH3:7][CH3:8]>> [CH3:7][CH3:8] | 4 | No maps in query, maps in target are ignored. |
[C:1]>>C | [CH3:7][CH3:8]>> [CH3:7][CH3:8] | 4 | Unpaired map in query ignored. |
[C:1]>>[C:1] | CC>>CC | 0 | No maps in target, hence no matches. |
[C:?1]>>[C:?1] | CC>>CC | 4 | Query says mapped as shown or not present. |
[C:1]>>[C:1] | [CH3:7][CH3:8]>>[CH3:7][CH3:8] | 2 | Matches for target 7,7 and 8,8 atom pairs. |
[C:1]>>[C:2] | [CH3:7][CH3:8]>> [CH3:7][CH3:8] | 4 | When a query class is not found on both sides of the query, it is ignored; this query does NOT say that the atoms are in different classes. |
[C:1][C:1]>>[C:1] | [CH3:7][CH3:7]>> [CH3:7][CH3:7] | 4 | Atom maps match with "or" logic. All atoms get bound to class 7. |
[C:1][C:1]>>[C:1] | [CH3:7][CH3:8]>> [CH3:7][CH3:8] | 4 | The reactant atoms are bound to classes 7 and 8. Note that having the first query atom bound to class 7 does not preclude binding the second atom. Next, the product atom can bind to classes 7 or 8. |
[C:1][C:1]>>[C:1] | [CH3:7][CH3:7]>> [CH3:7][CH3:8] | 2 | The reactants are bound to class 7. The product atom can bind to class 7 only. |
# Install requirements for the tutorial
!pip install pandas rdkit-pypi mols2grid matplotlib scikit-learn ipywidgets
import os
import pandas as pd
import numpy as np
# RDkit imports
import rdkit
from rdkit import Chem #This gives us most of RDkits's functionality
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
IPythonConsole.ipython_useSVG=True #SVG's tend to look nicer than the png counterparts
print(rdkit.__version__)
## Enumerator
from rdkit.Chem.Draw import rdDepictor
from rdkit.Chem import rdMolEnumerator
# Mute all errors except critical
Chem.WrapLogs()
lg = rdkit.RDLogger.logger()
lg.setLevel(rdkit.RDLogger.CRITICAL)
from collections import defaultdict
from rdkit.Chem.Draw import rdMolDraw2D
from IPython.display import SVG
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
# High DPI rendering for mac
%config InlineBackend.figure_format = 'retina'
# Plot matplotlib plots with white background:
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
plot_params = {
'font.size' : 15,
'axes.titlesize' : 15,
'axes.labelsize' : 15,
'axes.labelweight' : 'bold',
'xtick.labelsize' : 12,
'ytick.labelsize' : 12,
}
plt.rcParams.update(plot_params)
rxn = AllChem.ReactionFromSmarts('[C:1]=[O,N:2]>>[C:1][*:2]')
rxn
[Chem.MolToSmiles(x,1) for x in rxn.RunReactants((Chem.MolFromSmiles('CC=O'),))[0]]
rxn = AllChem.ReactionFromSmarts('[C:1]~[O,N:2]>>*[C:1]~[*:2]')
rxn
[Chem.MolToSmiles(x,1) for x in rxn.RunReactants((Chem.MolFromSmiles('C#N'),))[0]]
rxn = AllChem.ReactionFromSmarts("([C:1]=[C;H2].[C:2]=[C;H2])>>[*:1]=[*:2]")
rxn
m1 = Chem.MolFromSmiles('C=CCOCC=C')
m1
ps = rxn.RunReactants((m1,))
ps[0][0]
from rdkit.Chem import rdFMCS
def align_bundle_coords(bndl):
ps = rdFMCS.MCSParameters()
for m in bndl:
Chem.SanitizeMol(m)
mcs = rdFMCS.FindMCS(bndl,completeRingsOnly=True)
q = Chem.MolFromSmarts(mcs.smartsString)
rdDepictor.Compute2DCoords(q)
for m in bndl:
rdDepictor.GenerateDepictionMatching2DStructure(m,q)
pv1 = Chem.MolFromMolBlock('''
Mrv2007 06232015292D
0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 9 8 0 0 0
M V30 BEGIN ATOM
M V30 1 C -1.7083 2.415 0 0
M V30 2 C -3.042 1.645 0 0
M V30 3 C -3.042 0.105 0 0
M V30 4 N -1.7083 -0.665 0 0
M V30 5 C -0.3747 0.105 0 0
M V30 6 C -0.3747 1.645 0 0
M V30 7 * -0.8192 1.3883 0 0
M V30 8 O -0.8192 3.6983 0 0
M V30 9 C 0.5145 4.4683 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 2 3
M V30 3 1 3 4
M V30 4 2 4 5
M V30 5 1 5 6
M V30 6 2 1 6
M V30 7 1 7 8 ENDPTS=(3 1 5 6) ATTACH=ANY
M V30 8 1 8 9
M V30 END BOND
M V30 END CTAB
M END''')
pv1
pv1_bundle = rdMolEnumerator.Enumerate(pv1)
align_bundle_coords(pv1_bundle)
Draw.MolsToGridImage(pv1_bundle)
from rdkit.Chem import rdmolfiles as rdfiles
lib_alcohols = rdfiles.SmilesMolSupplier('./molecule_enumerations/alcohol.smi') #Read input file with all reactants
lib_alkynes = rdfiles.SmilesMolSupplier('./molecule_enumerations/alkynes.smi')
reaction_library = pd.read_csv('./molecule_enumerations/rxn_definitions.txt', sep='\t', index_col = 'ReactionName')
reaction_library
def unimolecular(molecule, reaction_smirk):
rxn = AllChem.ReactionFromSmarts(reaction_smirk)
product = rxn.RunReactants([molecule,])
final_smiles_list = []
try:
for i in range(len(product)):
final_smiles = Chem.MolToSmiles(product[i][0])
final_smiles_list.append(final_smiles)
except:
pass
return final_smiles_list
Draw.MolsToGridImage(lib_alcohols)
reaction_library.ReactionSMARTS['Primary alc ox']
unimolecular(lib_alcohols[0], reaction_library.ReactionSMARTS['Primary alc ox'])
products = []
for alcohol_mol in lib_alcohols:
prod = unimolecular(alcohol_mol, reaction_library.ReactionSMARTS['Primary alc ox'])
for mol in prod:
product = Chem.MolFromSmiles(mol)
products.append(product)
align_bundle_coords(products)
Draw.MolsToGridImage(products)
def bimolecular_rxn(mol1, mol2, reaction_smirk):
rxn = AllChem.ReactionFromSmarts(reaction_smirk)
product = rxn.RunReactants([mol1, mol2])
final_smiles_list = []
try:
for i in range(len(product)):
final_smiles = Chem.MolToSmiles(product[i][0])
final_smiles_list.append(final_smiles)
except:
pass
return final_smiles_list
reaction_smirks = reaction_library.ReactionSMARTS["4-Click"] #Define the reaction type from the reaction library
reaction_smirks
product_bimolecular = [] #List of enumerated molecules
Core = Chem.MolFromSmiles('O[C@@H]1[C@@H](O)[C@H](OCC2=CC=CC=C2)[C@H](C[C@H]1N=[N+]=[N-])C([O-])=O') #Define the molecular core
Core
for i in lib_alkynes: #Read the .smi-file of reagents to be used for enumeration
resulting_smiles = bimolecular_rxn(Core, i, reaction_smirks)
for i in resulting_smiles:
prod1 = Chem.MolFromSmiles(i)
product_bimolecular.append(prod1)
align_bundle_coords(product_bimolecular)
Draw.MolsToGridImage(product_bimolecular)