from rdkit import Chem from rdkit.Chem.Draw import IPythonConsole from rdkit.Chem import AllChem from rdkit.Chem import Draw import gzip,random inlines = gzip.open('data/zbb.smi.gz').readlines() random.seed(42) random.shuffle(inlines) indata = '\n'.join(inlines[:10000]) suppl = Chem.SmilesMolSupplier() suppl.SetData(indata) ms = [x for x in suppl if x is not None] ms[0] from rdkit.Chem import FunctionalGroups fgs = FunctionalGroups.BuildFuncGroupHierarchy() fgs [x.name for x in fgs] [x.label for x in fgs] [x.smarts for x in fgs] [x.label for x in fgs[1].children] from collections import namedtuple nt = namedtuple('pattern','smarts mol') def flattenFgs(fgs,res): if not fgs: return for x in fgs: res[x.label]=nt(x.smarts,x.pattern) flattenFgs(x.children,res) allFgDefs={} flattenFgs(fgs,allFgDefs) allFgNames=sorted(allFgDefs.keys()) allFgNames allFgs={} for fgn in allFgNames: patt = allFgDefs[fgn] allFgs[fgn]=[m for m in ms if m.HasSubstructMatch(patt.mol)] print '%s: Found %d '%(fgn,len(allFgs[fgn])) rxn = AllChem.ReactionFromSmarts('[a:1]-[Br,I].[N;H1;D2;$(N(-[#6])-[#6]);!$(N-[!#6;!#1]);!$(N-C=[O,N,S]):2]>>[a:1]-[N:2]') Draw.ReactionToImage(rxn) halogens = allFgs['Halogen.NotFluorine.Aromatic'] amines = allFgs['Amine.Secondary'] Draw.MolsToGridImage(halogens[:20],molsPerRow=4,legends=[x.GetProp('_Name') for x in halogens]) Draw.MolsToGridImage(amines[:20],molsPerRow=4,legends=[x.GetProp('_Name') for x in amines]) halogens = [x for x in halogens if len(x.GetSubstructMatches(allFgDefs['Halogen.NotFluorine'].mol))==1] halogens = [x for x in halogens if not x.HasSubstructMatch(allFgDefs['Amine'].mol)] len(halogens) Draw.MolsToGridImage(halogens[:20],molsPerRow=4,legends=[x.GetProp('_Name') for x in halogens]) amines = [x for x in amines if len(x.GetSubstructMatches(allFgDefs['Amine'].mol))==1] amines = [x for x in amines if not x.HasSubstructMatch(allFgDefs['Halogen.NotFluorine'].mol)] len(amines) Draw.MolsToGridImage(amines[:20],molsPerRow=4,legends=[x.GetProp('_Name') for x in amines]) products = AllChem.EnumerateLibraryFromReaction(rxn,(halogens,amines)) products products.next() first20 = [products.next()[0] for x in range(20)] Draw.MolsToGridImage(first20,molsPerRow=4) [Chem.SanitizeMol(x) for x in first20] Draw.MolsToGridImage(first20,molsPerRow=4) next20 = [products.next()[0] for x in range(20)] [Chem.SanitizeMol(x) for x in next20] Draw.MolsToGridImage(next20,molsPerRow=4) rxnData="""$RXN Marvin 092401121729 2 1 $MOL Mrv0541 09241217292D 7 7 0 0 0 0 999 V2000 -6.7915 -0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0 -7.6165 -0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0 -8.0290 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0 -7.6165 0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0 -6.7915 0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0 -6.3790 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0 -5.5540 0.0000 0.0000 * 0 0 0 0 0 0 0 0 0 0 0 0 1 2 4 0 0 0 0 2 3 4 0 0 0 0 3 4 4 0 0 0 0 4 5 4 0 0 0 0 5 6 4 0 0 0 0 6 7 1 0 0 0 0 6 1 4 0 0 0 0 V 7 Halogen.NotFluorine.Aromatic M ALS 7 2 F Br I M END $MOL Mrv0541 09241217292D 3 2 0 0 0 0 999 V2000 -2.3645 -0.2750 0.0000 N 0 0 0 0 0 0 0 0 0 7 0 0 -1.6500 0.1375 0.0000 C 0 0 0 0 0 0 0 0 0 8 0 0 -3.0790 0.1375 0.0000 C 0 0 0 0 0 0 0 0 0 9 0 0 2 1 1 0 0 0 0 1 3 1 0 0 0 0 V 1 Amine.Secondary M END $MOL Mrv0541 09241217292D 9 9 0 0 0 0 999 V2000 4.4197 -0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 1 0 0 3.5947 -0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 2 0 0 3.1821 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 3 0 0 3.5947 0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 4 0 0 4.4197 0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 5 0 0 4.8322 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 6 0 0 5.6572 0.0000 0.0000 N 0 0 0 0 0 0 0 0 0 7 0 0 6.0697 -0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 8 0 0 6.0697 0.7145 0.0000 C 0 0 0 0 0 0 0 0 0 9 0 0 1 2 4 0 0 0 0 2 3 4 0 0 0 0 3 4 4 0 0 0 0 4 5 4 0 0 0 0 5 6 4 0 0 0 0 6 7 1 0 0 0 0 8 7 1 0 0 0 0 7 9 1 0 0 0 0 6 1 4 0 0 0 0 M END """ rxn = AllChem.ReactionFromRxnBlock(rxnData) AllChem.Compute2DCoordsForReaction(rxn) Draw.ReactionToImage(rxn) m1 = Chem.MolFromSmiles('c1ccc(C)cc1Br') m2 = Chem.MolFromSmiles('CCNCC') ps = rxn.RunReactants((m1,m2,)) ps usmis = set() for p in ps: smi = Chem.MolToSmiles(p[0]) usmis.add(smi) usmis Draw.MolsToGridImage([Chem.MolFromSmiles(x) for x in usmis]) m1 = Chem.MolFromSmiles('c1ccc(C)cc1Br') m2 = Chem.MolFromSmiles('CCNCC(=O)NC') ps = rxn.RunReactants((m1,m2,)) usmis = set() for p in ps: smi = Chem.MolToSmiles(p[0]) usmis.add(smi) usmis Draw.MolsToGridImage([Chem.MolFromSmiles(x) for x in usmis]) # this should have some documentation and a lot more error checking... that's coming in the next RDKit release. def PreprocessReaction(reaction,funcGroupFilename=None): reaction._setImplicitPropertiesFlag(True) reaction.Initialize() nReactants = reaction.GetNumReactantTemplates() nProducts = reaction.GetNumProductTemplates() nWarn,nError = reaction.Validate() reactantLabels = [] if not nError: FunctionalGroups.BuildFuncGroupHierarchy(fileNm=funcGroupFilename) gpNms={} for k in FunctionalGroups.groupDefns.keys(): gpNms[k.lower()]=k for i in range(nReactants): m = reaction.GetReactantTemplate(i) gps = [] for at in m.GetAtoms(): if at.HasProp('molFileValue'): atIdx = at.GetIdx() vals = at.GetProp('molFileValue').lower() queryName=[] queries=[] for v in vals.split(','): if gpNms.has_key(v): queryName.append(gpNms[v]) queries.append(FunctionalGroups.groupDefns[gpNms[v]]) else: queryName=None queries=None break if queries: if len(queries)>1: # combine the individual queries into one SMARTS: smas = ['$(%s)'%x.smarts for x in queries] overallSmarts='[%s]'%','.join(smas) pattern = Chem.MolFromSmarts(overallSmarts) if not pattern: raise ValueError,'could not build query from combined SMARTS "%s"'%overallSmarts else: pattern=queries[0].pattern Chem.AddRecursiveQuery(m,pattern,atIdx) queryName=','.join(queryName) else: queryName=None gps.append((atIdx,queryName)) reactantLabels.append(tuple(gps)) return reactantLabels PreprocessReaction(rxn) ps = rxn.RunReactants((m1,m2,)) usmis = set() for p in ps: smi = Chem.MolToSmiles(p[0]) usmis.add(smi) usmis Draw.MolsToGridImage([Chem.MolFromSmiles(x) for x in usmis]) Draw.ReactionToImage(rxn) mol = Chem.MolFromSmiles('n1ccc(N(C)C)cc1') mol rxn.IsMoleculeProduct(mol) mol = Chem.MolFromSmiles('OC(=O)c1ccc(N(C)C)cc1') mol rxn.IsMoleculeProduct(mol) mol = Chem.MolFromSmiles('c1ccncc1Br') mol rxn.IsMoleculeReactant(mol) mol = Chem.MolFromSmiles('c1ccc(C2CC2)cc1Br') mol rxn.IsMoleculeReactant(mol) ras=rxn.GetReactingAtoms() ras Draw.ReactionToImage(rxn,includeAtomNumbers=True) match = m1.GetSubstructMatch(rxn.GetReactantTemplate(0)) Draw.MolToImage(m1,highlightAtoms=[match[x] for x in ras[0]]) match = m2.GetSubstructMatch(rxn.GetReactantTemplate(1)) Draw.MolToImage(m2,highlightAtoms=[match[x] for x in ras[1]])