Auto3D to make optimize tautomers and 3D conformers

Playthrough on using Auto3D.
Published

January 16, 2023

import os
import sys
import copy 
root = os.path.dirname(os.path.dirname(os.path.abspath("__file__")))
sys.path.append(root)

View the test molecules

import pandas as pd
import numpy as np 
from rdkit.Chem import PandasTools
import rdkit
from rdkit import Chem #This gives us most of RDkits's functionality
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole #Needed to show molecules
IPythonConsole.ipython_useSVG=True  #SVG's tend to look nicer than the png counterparts
print(rdkit.__version__)

# Mute all errors except critical
Chem.WrapLogs()
lg = rdkit.RDLogger.logger() 
lg.setLevel(rdkit.RDLogger.CRITICAL)
2021.09.4
test_smi = pd.read_csv('./tauto.smi', sep=' ', header=None, names=['SMILES','Name'])
test_smi.sample(2)
SMILES Name
1 C1(=CC(=NC(=C1)Cl)Cl)O Cl-pyridone
4 N=C(C1=C(OCC2=C(F)C=CC=C2)C=CC=C1)O boo
PandasTools.AddMoleculeColumnToFrame(test_smi, smilesCol='SMILES')
PandasTools.FrameToGridImage(test_smi, legendsCol='Name', molsPerRow=4)

Generate low-energy 3D structures with Auto3D

#Always ensure that you have the latest version
import Auto3D
from Auto3D.auto3D import options, main
print(Auto3D.__version__)
2.0
options
<function Auto3D.auto3D.options(path, k=False, window=False, verbose=False, job_name='', enumerate_tautomer=False, tauto_engine='rdkit', pKaNorm=True, isomer_engine='rdkit', enumerate_isomer=True, mode_oe='classic', mpi_np=4, max_confs=None, use_gpu=True, gpu_idx=0, capacity=42, optimizing_engine='AIMNET', patience=1000, opt_steps=5000, convergence_threshold=0.003, threshold=0.3, memory=None, batchsize_atoms=1024)>

import Auto3D from Auto3D.auto3D import options, main

path = ‘./tauto.smi’ args = options(path, window=5, #Outputs the structures whose energies are within a window (kcal/mol) enumerate_tautomer = True, #For enumerating tautomers enumerate_isomer = True, isomer_engine=‘rdkit’, optimizing_engine=‘ANI2xt’, use_gpu=False) #args specify the parameters for Auto3D out = main(args) #main acceps the parameters and run Auto3D print(out)

out = '/lrlhps/users/l017301/SHARE/PROJECTS/SOFTWARE/Auto3D/Jan14/20230116-172252-638797_tauto/tauto_out.sdf'
out_folder = os.path.dirname(out)

Read the SDF files

import mols2grid
from rdkit import Chem
from rdkit.Chem import AllChem
sdf_df = PandasTools.LoadSDF(out)
sdf_df
ID E_tot fmax Converged E_rel(kcal/mol) ROMol
0 Cl-pyridone@taut1 -1242.4485217285633 0.0028711010236293077 True 0.0 <img data-content="rdkit/molecule" src="" alt="Mol"/>
1 Cl-pyridone@taut2 -1242.4576208991289 0.0029037443455308676 True 0.0 <img data-content="rdkit/molecule" src="" alt="Mol"/>
2 benzene-fused@taut1 -646.5230541429044 0.0028541951905936003 True 0.0 <img data-content="rdkit/molecule" src="" alt="Mol"/>
3 benzene-fused@taut2 -646.5449096909285 0.00297327502630651 True 0.0 <img data-content="rdkit/molecule" src="" alt="Mol"/>
4 benzene-fused@taut3 -646.5199764570714 0.0022692130878567696 True 0.0 <img data-content="rdkit/molecule" src="" alt="Mol"/>
... ... ... ... ... ... ...
228 sildenafil@taut9 -1883.1668644994259 0.0027246379759162664 True 2.6457204767372136 <img data-content="rdkit/molecule" src="" alt="Mol"/>
229 sildenafil@taut9 -1883.166294679022 0.0028603156097233295 True 3.0032881786646404 <img data-content="rdkit/molecule" src="" alt="Mol"/>
230 sildenafil@taut9 -1883.1660918444156 0.002793555613607168 True 3.130568815727958 <img data-content="rdkit/molecule" src="" alt="Mol"/>
231 sildenafil@taut9 -1883.1657381802559 0.002911796560510993 True 3.3524964267405757 <img data-content="rdkit/molecule" src="" alt="Mol"/>
232 sildenafil@taut9 -1883.1640510409832 0.0027946573682129383 True 4.411192304313834 <img data-content="rdkit/molecule" src="" alt="Mol"/>

233 rows × 6 columns

sdf_df = sdf_df.sort_values('E_tot', ascending=False)
sdf_df = sdf_df.drop_duplicates(subset=['ID'])
sdf_df['parent_id'] = [x.split('@')[0] for x in sdf_df['ID']]
sdf_df['tauto_id'] = [x.split('@')[1] for x in sdf_df['ID']]
sdf_df['SMILES'] = [Chem.MolToSmiles(x) for x in sdf_df['ROMol']]
sdf_df['E_tot'] = sdf_df['E_tot'].astype(float)
sdf_df.columns
Index(['ID', 'E_tot', 'fmax', 'Converged', 'E_rel(kcal/mol)', 'ROMol',
       'parent_id', 'tauto_id', 'SMILES'],
      dtype='object')
sdf_format = sdf_df[['parent_id','tauto_id', 'ID', 'SMILES', 'E_tot', 'fmax', 'Converged', 'E_rel(kcal/mol)']]
# convert Hatrees to kcal/mol http://wild.life.nctu.edu.tw/class/common/energy-unit-conv-table.html
E_rel = []
for parent_id in set(list(sdf_format['parent_id'])):
  sdf_format_per_parent = sdf_format.loc[ sdf_format['parent_id'] == parent_id ]
  sdf_format_per_parent.loc[sdf_format_per_parent.index, 'E_rel_tauto(kcal/mol)'] = (sdf_format_per_parent['E_tot'] - min(sdf_format_per_parent['E_tot'])) * 627.5095
  E_rel.append(sdf_format_per_parent)
sdf_format = pd.concat(E_rel)
/node/scratch/106404095.1.all.normal.q/ipykernel_28232/475714180.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sdf_format_per_parent.loc[sdf_format_per_parent.index, 'E_rel_tauto(kcal/mol)'] = (sdf_format_per_parent['E_tot'] - min(sdf_format_per_parent['E_tot'])) * 627.5095
sdf_format
parent_id tauto_id ID SMILES E_tot fmax Converged E_rel(kcal/mol) E_rel_tauto(kcal/mol)
167 sildenafil taut26 sildenafil@taut26 CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)... -1883.237851 0.0027714346069842577 True 0.0 0.000000
174 sildenafil taut27 sildenafil@taut27 CCCc1nn(C)c2c(=O)nc(-c3cc(S(=O)(=O)N4CCN(C)CC4... -1883.217860 0.0029108182061463594 True 0.0 12.544642
183 sildenafil taut28 sildenafil@taut28 CCCc1nn(C)c2c(O)nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)... -1883.211783 0.002911168849095702 True 0.0 16.358181
155 sildenafil taut24 sildenafil@taut24 CC/C=C1/NN(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3... -1883.198682 0.002975363051518798 True 0.0 24.578853
119 sildenafil taut20 sildenafil@taut20 C/C=C/c1nn(C)c2c1N[C@@H](c1cc(S(=O)(=O)N3CCN(C... -1883.195765 0.002867447677999735 True 0.0 26.409780
210 sildenafil taut5 sildenafil@taut5 C/C=C/C1=C2NC(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OC... -1883.185570 0.0029683925677090883 True 0.0 32.806838
91 sildenafil taut17 sildenafil@taut17 C/C=C/c1nn(C)c2c1N=C(c1cc(S(=O)(=O)N3CCN(C)CC3... -1883.185264 0.002987172920256853 True 0.0 32.998937
151 sildenafil taut23 sildenafil@taut23 CC/C=C1/NN(C)c2c1[nH]c(-c1cc(S(=O)(=O)N3CCN(C)... -1883.183689 0.0029540995601564646 True 0.0 33.987148
96 sildenafil taut18 sildenafil@taut18 C/C=C/c1nn(C)c2c1NC(c1cc(S(=O)(=O)N3CCN(C)CC3)... -1883.178366 0.0029339187312871218 True 0.0 37.327788
81 sildenafil taut15 sildenafil@taut15 C/C=C/[C@@H]1NN(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(... -1883.176364 0.0029547689482569695 True 0.0 38.583651
143 sildenafil taut22 sildenafil@taut22 CC/C=C1/NN(C)c2c(O)nc(-c3cc(S(=O)(=O)N4CCN(C)C... -1883.173366 0.0019200812093913555 True 0.0 40.464996
101 sildenafil taut19 sildenafil@taut19 C/C=C/c1nn(C)c2c1N[C@@H](c1cc(S(=O)(=O)N3CCN(C... -1883.172031 0.00290022068656981 True 0.0 41.302550
227 sildenafil taut9 sildenafil@taut9 C/C=C/C1=NN(C)[C@@H]2C(=O)NC(c3cc(S(=O)(=O)N4C... -1883.171081 0.0029343736823648214 True 0.0 41.899119
160 sildenafil taut25 sildenafil@taut25 CCCC1=NN(C)[C@@H]2C(=O)N=C(c3cc(S(=O)(=O)N4CCN... -1883.169082 0.0029732126276940107 True 0.0 43.153355
187 sildenafil taut3 sildenafil@taut3 C/C=C/C1=C2N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3O... -1883.166379 0.002880740910768509 True 0.0 44.849577
111 sildenafil taut2 sildenafil@taut2 C/C=C/C1=C2N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3O... -1883.164880 0.0029965881258249283 True 0.0 45.789950
222 sildenafil taut8 sildenafil@taut8 C/C=C/C1=NN(C)[C@@H]2C(=O)N=C(c3cc(S(=O)(=O)N4... -1883.164851 0.0029544399585574865 True 0.0 45.808576
213 sildenafil taut6 sildenafil@taut6 C/C=C\C1=NN(C)C2=C(O)N=C(c3cc(S(=O)(=O)N4CCN(C... -1883.164151 0.0029395974706858397 True 0.0 46.247307
88 sildenafil taut16 sildenafil@taut16 C/C=C/c1nn(C)c2c1=N[C@@H](c1cc(S(=O)(=O)N3CCN(... -1883.159181 0.0029624311719089746 True 0.0 49.366394
72 sildenafil taut14 sildenafil@taut14 C/C=C\[C@@H]1NN(C)c2c1[nH]c(-c1cc(S(=O)(=O)N3C... -1883.159030 0.002975861541926861 True 0.0 49.460966
55 sildenafil taut13 sildenafil@taut13 C/C=C\[C@@H]1NN(C)c2c(O)nc(-c3cc(S(=O)(=O)N4CC... -1883.148282 0.002590285148471594 True 0.0 56.205456
193 sildenafil taut4 sildenafil@taut4 C/C=C/C1=C2NC(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OC... -1883.144197 0.002940340666100383 True 0.0 58.768723
217 sildenafil taut7 sildenafil@taut7 C/C=C/C1=NN(C)C2=C(O)NC(c3cc(S(=O)(=O)N4CCN(C)... -1883.144103 0.0029915182385593653 True 0.0 58.827650
21 sildenafil taut10 sildenafil@taut10 C/C=C/C1=NN(C)[C@@H]2C(O)=NC(c3cc(S(=O)(=O)N4C... -1883.142340 0.0029348309617489576 True 0.0 59.933997
17 sildenafil taut1 sildenafil@taut1 C/C=C/C1=C2N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3O... -1883.142246 0.0026598710101097822 True 0.0 59.993093
29 sildenafil taut11 sildenafil@taut11 C/C=C/C1=NN(C)[C@@H]2C1=NC(c1cc(S(=O)(=O)N3CCN... -1883.141071 0.002974058734253049 True 0.0 60.730651
129 sildenafil taut21 sildenafil@taut21 CC/C=C1/NN(C)[C@@H]2C(=O)N=C(c3cc(S(=O)(=O)N4C... -1883.140111 0.0025039315223693848 True 0.0 61.333093
37 sildenafil taut12 sildenafil@taut12 C/C=C\[C@@H]1NN(C)[C@H]2C(=O)N=C(c3cc(S(=O)(=O... -1883.124047 0.002953974064439535 True 0.0 71.413043
9 boo taut2 boo@taut2 NC(=O)c1ccccc1OCc1ccccc1F -845.393300 0.002969717374071479 True 0.0 0.000000
5 boo taut1 boo@taut1 N=C(O)c1ccccc1OCc1ccccc1F -845.367714 0.002988976426422596 True 0.0 16.055548
15 pyridone taut1 pyridone@taut1 O=c1cc[nH]cc1 -323.358696 0.002931158524006605 True 0.0 0.000000
16 pyridone taut2 pyridone@taut2 Oc1ccncc1 -323.357830 0.0027759429067373276 True 0.0 0.543535
3 benzene-fused taut2 benzene-fused@taut2 O=c1ccnc2ccc3ccc[nH]c3c12 -646.544910 0.00297327502630651 True 0.0 0.000000
2 benzene-fused taut1 benzene-fused@taut1 O=c1cc[nH]c2ccc3cccnc3c12 -646.523054 0.0028541951905936003 True 0.0 13.714564
4 benzene-fused taut3 benzene-fused@taut3 Oc1ccnc2ccc3cccnc3c12 -646.519976 0.0022692130878567696 True 0.0 15.645841
1 Cl-pyridone taut2 Cl-pyridone@taut2 Oc1cc(Cl)nc(Cl)c1 -1242.457621 0.0029037443455308676 True 0.0 0.000000
0 Cl-pyridone taut1 Cl-pyridone@taut1 O=c1cc(Cl)[nH]c(Cl)c1 -1242.448522 0.0028711010236293077 True 0.0 5.709816
mols2grid.display(sdf_format, 
                  # set what's displayed on the grid
                  subset=["parent_id", "img", "tauto_id","E_rel_tauto(kcal/mol)"],
                  # set what's displayed on the hover tooltip
                  tooltip=["parent_id", "E_tot", "fmax", "E_rel_tauto(kcal/mol)"],
                  transform={"E_rel_tauto(kcal/mol)": lambda x: r"del_E_tauto: {0:0.3f} kcal/mol".format(x)},
                  size=(250,250))

Saving and viewing the SD files

Here I am looking at the 3D conformers of the original and tautomers

inf = open(os.path.join(out_folder, 'job1','smi_taut_3d.sdf'),'rb')
with Chem.ForwardSDMolSupplier(inf) as fsuppl:
    ms = [x for x in fsuppl if x is not None]
## Combine all the conformers for a given molecule in 1 molecule object 
mol_dict = []
get_og_name = [x.GetProp('ID').split('@')[0] for x in ms]
for index, row in test_smi.iterrows():
  item_index = []
  for entry_index, og_name in enumerate(get_og_name):
    if str(row['Name']) == og_name:
      item_index.append(entry_index)
  list_sdf = [ms[i] for i in item_index]
  sdf_dict = {}
  sdf_dict['parent'] = row['Name']
  sdf_dict['conformers'] = list_sdf 
  mol_dict.append(sdf_dict)
def to_sdf(mol_dict):
  parent_id = mol_dict['parent']
  w = Chem.SDWriter(f'{out_folder}/conformers_{parent_id}.sdf')
  sdfs = mol_dict['conformers']
  for entries in sdfs:
    w.write(entries)
  w.close()

def append_conformers_to_mol(mol_dict):
  parent_id = mol_dict['parent']
  sdfs = mol_dict['conformers']
  ref = copy.deepcopy(sdfs[0])
  for mol in sdfs:
    conf_mol = mol.GetConformer()
    mol_props = mol.GetPropsAsDict()
    for key, value in mol_props.items():
      conf_mol.SetProp(str(key), str(value))
    ref.AddConformer(conf_mol, assignId=True)
  
  for name in ref.GetPropNames():
    ref.ClearProp(name)
  
  ref.SetProp('_Name',str(parent_id))
  Chem.rdMolAlign.AlignMolConformers(ref)
  return ref 
for mol_list in mol_dict:
  print(mol_list['parent'], len(mol_list['conformers']))
pyridone 2
Cl-pyridone 2
benzene-fused 3
sildenafil 216
boo 10
mol_dict[1]
{'parent': 'Cl-pyridone',
 'conformers': [<rdkit.Chem.rdchem.Mol at 0x2b29365373a0>,
  <rdkit.Chem.rdchem.Mol at 0x2b2936537030>]}
to_sdf(mol_dict[1])
ref = append_conformers_to_mol(mol_dict[-1])
# http://rdkit.blogspot.com/2016/07/using-ipywidgets-and-py3dmol-to-browse.html
import py3Dmol
from rdkit import Chem
from rdkit.Chem import AllChem
from ipywidgets import interact, interactive, fixed
ref.GetNumConformers()
11

What I want to do is generate a set of conformers for a molecule and scroll through them interactively. Here’s some code for doing that:

# https://birdlet.github.io/2019/10/02/py3dmol_example/
# http://rdkit.blogspot.com/2016/07/using-ipywidgets-and-py3dmol-to-browse.html

import py3Dmol

def MolTo3DView(mol, confId, size=(400, 400), style="stick", surface=False, opacity=0.5):
    """Draw molecule in 3D
    
    Args:
    ----
        mol: rdMol, molecule to show
        size: tuple(int, int), canvas size
        style: str, type of drawing molecule
               style can be 'line', 'stick', 'sphere', 'carton'
        surface, bool, display SAS
        opacity, float, opacity of surface, range 0.0-1.0
    Return:
    ----
        viewer: py3Dmol.view, a class for constructing embedded 3Dmol.js views in ipython notebooks.
    """
    assert style in ('line', 'stick', 'sphere', 'carton')
    mblock = Chem.MolToMolBlock(mol, confId=confId)
    viewer = py3Dmol.view(width=size[0], height=size[1])
    viewer.addModel(mblock, 'sdf')
    viewer.setStyle({style:{}})
    if surface:
        viewer.addSurface(py3Dmol.SAS, {'opacity': opacity})
    viewer.zoomTo()
    return viewer
ref.GetProp('_Name')
'boo'
for conf in ref.GetConformers():
    print(conf.GetId())
0
1
2
3
4
5
6
7
8
9
10
from ipywidgets import interact,fixed,IntSlider
import ipywidgets

from rdkit import Chem
from rdkit.Chem import AllChem

def conf_viewer(mol, confId=-1):
    return MolTo3DView(mol, confId).show()

interact(conf_viewer, mol=fixed(ref), confId=ipywidgets.IntSlider(min=0,max=ref.GetNumConformers()-1, step=1))

You appear to be running in JupyterLab (or JavaScript failed to load for some other reason). You need to install the 3dmol extension:
jupyter labextension install jupyterlab_3dmol

<function __main__.conf_viewer(mol, confId=-1)>
sdf_mol_info = sdf_format.loc[ sdf_format['parent_id'] == ref.GetProp('_Name')]
mols2grid.display(sdf_mol_info, 
                  # set what's displayed on the grid
                  subset=["parent_id", "img", "tauto_id","E_rel_tauto(kcal/mol)"],
                  # set what's displayed on the hover tooltip
                  tooltip=["parent_id", "E_tot", "fmax", "E_rel_tauto(kcal/mol)"],
                  transform={"E_rel_tauto(kcal/mol)": lambda x: f'del_E_tauto: {round(x, 3)} kcal/mol'},
                  size=(250,250))