In [1]:
from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole
import copy
import numpy as np
In [2]:
import pandas as pd
csvfile = './data/heterocycle_reactions.csv'

Read all the patterns into RDKit reactions

In [3]:
import csv
smarts_reader = csv.DictReader(open(csvfile))
REACTIONS = []
for row in smarts_reader:
    smarts = row['SMARTS']
    if not smarts:
        continue

    for product in row['CONVERT_TO'].split(','):
        reaction = smarts + '>>' + product
        REACTIONS.append(AllChem.ReactionFromSmarts(reaction))
In [4]:
def get_unique_products(mol):
    unique = set()
    for rxn in REACTIONS:
        for newmol in rxn.RunReactants((mol,)):
            isosmi = Chem.MolToSmiles(newmol[0], isomericSmiles=True)
            if isosmi in unique:
                continue
            unique.add(isosmi)
            Chem.SanitizeMol(newmol[0])
            yield newmol[0]
In [5]:
def enumerate_heterocycles(mol):
    start = mol
    starting_points = [start]
    seen = set()
    while starting_points:
        for newmol in get_unique_products(starting_points.pop()):
            newmol_smiles = Chem.MolToSmiles(newmol)
            if newmol_smiles in seen:
                continue
            starting_points.append(newmol)
            seen.add(newmol_smiles)
            yield newmol
In [6]:
bnzn = Chem.MolFromSmiles('c1ccccc1')
Draw.MolsToGridImage(list(enumerate_heterocycles(bnzn)), molsPerRow=10)
Out[6]:
In [7]:
capivasertib = Chem.MolFromSmiles('c1cc(ccc1[[email protected]](CCO)NC(=O)C2(CCN(CC2)c3c4cc[nH]c4ncn3)N)Cl')
capivasertib
Out[7]:
In [8]:
protected_capivasertib = copy.deepcopy(capivasertib)
atoms = protected_capivasertib.GetAtoms()
phenyl = Chem.MolFromSmiles('c1ccccc1')
mactches = protected_capivasertib.GetSubstructMatches(phenyl)
arr = np.array(mactches)
matches = arr.flatten()
In [9]:
for atom in atoms:
    if atom.GetIdx() not in matches:
        atom.SetProp('_protected', '1')
In [10]:
enume1 = list(enumerate_heterocycles(capivasertib))
In [11]:
enume2 = list(enumerate_heterocycles(protected_capivasertib))
In [12]:
print(len(enume1), len(enume2))
2592 9
In [13]:
Draw.MolsToGridImage(enume1[:10], molsPerRow=5)
Out[13]:
In [14]:
Draw.MolsToGridImage(enume2[:10], molsPerRow=5)
Out[14]:
In [ ]: