from __future__ import print_function
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import IPythonConsole
import copy
import numpy as np
import pandas as pd
csvfile = './data/heterocycle_reactions.csv'
Read all the patterns into RDKit reactions
import csv
smarts_reader = csv.DictReader(open(csvfile))
REACTIONS = []
for row in smarts_reader:
smarts = row['SMARTS']
if not smarts:
continue
for product in row['CONVERT_TO'].split(','):
reaction = smarts + '>>' + product
REACTIONS.append(AllChem.ReactionFromSmarts(reaction))
def get_unique_products(mol):
unique = set()
for rxn in REACTIONS:
for newmol in rxn.RunReactants((mol,)):
isosmi = Chem.MolToSmiles(newmol[0], isomericSmiles=True)
if isosmi in unique:
continue
unique.add(isosmi)
Chem.SanitizeMol(newmol[0])
yield newmol[0]
def enumerate_heterocycles(mol):
start = mol
starting_points = [start]
seen = set()
while starting_points:
for newmol in get_unique_products(starting_points.pop()):
newmol_smiles = Chem.MolToSmiles(newmol)
if newmol_smiles in seen:
continue
starting_points.append(newmol)
seen.add(newmol_smiles)
yield newmol
bnzn = Chem.MolFromSmiles('c1ccccc1')
Draw.MolsToGridImage(list(enumerate_heterocycles(bnzn)), molsPerRow=10)
capivasertib = Chem.MolFromSmiles('c1cc(ccc1[C@H](CCO)NC(=O)C2(CCN(CC2)c3c4cc[nH]c4ncn3)N)Cl')
capivasertib
protected_capivasertib = copy.deepcopy(capivasertib)
atoms = protected_capivasertib.GetAtoms()
phenyl = Chem.MolFromSmiles('c1ccccc1')
mactches = protected_capivasertib.GetSubstructMatches(phenyl)
arr = np.array(mactches)
matches = arr.flatten()
for atom in atoms:
if atom.GetIdx() not in matches:
atom.SetProp('_protected', '1')
enume1 = list(enumerate_heterocycles(capivasertib))
enume2 = list(enumerate_heterocycles(protected_capivasertib))
print(len(enume1), len(enume2))
2592 9
Draw.MolsToGridImage(enume1[:10], molsPerRow=5)
Draw.MolsToGridImage(enume2[:10], molsPerRow=5)