from rdkit import Chem
from rdkit.Chem import AllChem,Draw,Descriptors
from rdkit.Chem.Draw import IPythonConsole
Note that throughout in this section I am somewhat lazy with terminology: "Chiral" here refers to chiral centers in molecules, not bulk properties of collections of molecules.
p = Chem.MolFromSmarts('CC(F)Cl')
p1 = Chem.MolFromSmarts('C[C@H](F)Cl')
p2 = Chem.MolFromSmarts('C[C@@H](F)Cl')
m = Chem.MolFromSmiles('CCC(F)Cl')
m1 = Chem.MolFromSmiles('CC[C@H](F)Cl')
m2 = Chem.MolFromSmiles('CC[C@@H](F)Cl')
The default behavior is the same as in previous versions of the RDKit: substructure matching ignores stereochemistry
print 'm :',m.HasSubstructMatch(p),m.HasSubstructMatch(p1),m.HasSubstructMatch(p2)
print 'm1:',m1.HasSubstructMatch(p),m1.HasSubstructMatch(p1),m1.HasSubstructMatch(p2)
print 'm2:',m2.HasSubstructMatch(p),m2.HasSubstructMatch(p1),m2.HasSubstructMatch(p2)
m : True True True m1: True True True m2: True True True
This can be changed with the useChirality option:
print 'm :',m.HasSubstructMatch(p,useChirality=True),m.HasSubstructMatch(p1,useChirality=True),m.HasSubstructMatch(p2,useChirality=True)
print 'm1:',m1.HasSubstructMatch(p,useChirality=True),m1.HasSubstructMatch(p1,useChirality=True),m1.HasSubstructMatch(p2,useChirality=True)
print 'm2:',m2.HasSubstructMatch(p,useChirality=True),m2.HasSubstructMatch(p1,useChirality=True),m2.HasSubstructMatch(p2,useChirality=True)
m : True False False m1: True True False m2: True False True
Note how this works: chiral queries only match when atoms with the appropriate chirality are present, but non-chiral queries match everywhere.
Including the explicit H in the query is not necessary as long as the other three atoms are present:
p3 = Chem.MolFromSmarts('C[C@](F)Cl')
p4 = Chem.MolFromSmarts('C[C@@](F)Cl')
print 'm1:',m1.HasSubstructMatch(p3,useChirality=True),m1.HasSubstructMatch(p4,useChirality=True)
print 'm2:',m2.HasSubstructMatch(p3,useChirality=True),m2.HasSubstructMatch(p4,useChirality=True)
m1: True False m2: False True
Let's move on to reactions
rxn = AllChem.ReactionFromSmarts("[C:1][C@H:2]1[C:3][C:4][N:5]([C:6])[C:7](=[O:8])[C@H:9]1[C:10]>>[C:1][C@H:2]1[C:3][C:4][N:5]([C:6])[C:7](=[O:8])[C@@H:9]1[C:10]")
rxn
In this reaction, the stereochemistry (if any) at C9 will be inverted, while the stereochemistry (if any) at C2 will be retained.
m = Chem.MolFromSmiles('C[C@H]1CCN(C)C(=O)[C@H]1CC')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
The stereochemistry info is not used in the substructure matching:
m = Chem.MolFromSmiles('C[C@@H]1CCN(C)C(=O)[C@@H]1CC')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
The reaction still applies if stereochemistry info is missing from the reactants:
m = Chem.MolFromSmiles('CC1CCN(C)C(=O)[C@@H]1CC')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('C[C@H]1CCN(C)C(=O)C1CC')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
Stereo labels match -> retention of stereochemistry:
rxn = AllChem.ReactionFromSmarts("[F:1][C@H:2]([C:3])[I:4]>>[F:1][C@H:2]([C:3])[Cl:4]")
m = Chem.MolFromSmiles('F[C@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('F[C@@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('FC(CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
Stereo labels change -> Inversion
rxn = AllChem.ReactionFromSmarts("[F:1][C@H:2]([C:3])[I:4]>>[F:1][C@@H:2]([C:3])[Cl:4]")
m = Chem.MolFromSmiles('F[C@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('F[C@@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('FC(CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
Stereo label removed -> removal of stereochemistry:
rxn = AllChem.ReactionFromSmarts("[F:1][C@H:2]([C:3])[I:4]>>[F:1][C:2]([C:3])[Cl:4]")
m = Chem.MolFromSmiles('F[C@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('F[C@@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('FC(CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
Stereo label appears -> creation/setting of stereochemistry:
rxn = AllChem.ReactionFromSmarts("[F:1][C:2]([C:3])[I:4]>>[F:1][C@H:2]([C:3])[Cl:4]")
m = Chem.MolFromSmiles('FC(CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('F[C@@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
m = Chem.MolFromSmiles('F[C@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
It's not purely the stereo label that's use in determining whether or not to invert:
rxn = AllChem.ReactionFromSmarts("[F:1][C@H:2]([C:3])[I:4]>>[F:1][C@H:2]([Cl:4])[C:3]")
m = Chem.MolFromSmiles('F[C@H](CC)I')
p = rxn.RunReactants((m,))[0][0]
Chem.SanitizeMol(p)
Draw.MolsToGridImage((m,p))
Can be used to make up for deficiencies in generic reactions or to simulate protecting groups without requiring additional steps.
Demonstrate this with a simple amide-bond formation definition:
rxn = AllChem.ReactionFromSmarts('[C:1](=[O:2])-[OH].[N;!H0:3]>>[C:1](=[O:2])-[N:3]')
rxn
Here are the reactants, including a building block with two amines:
acid = Chem.MolFromSmiles('c1ccccc1C(=O)O')
acid
amine = Chem.MolFromSmiles('NCCc1ccccc1N')
amine
By default this generates two products:
ps = [x[0] for x in rxn.RunReactants((acid,amine))]
[Chem.SanitizeMol(x) for x in ps]
Draw.MolsToGridImage(ps)
But we can protect one of the amines by setting the "_protected" atom property :
amine.GetAtomWithIdx(0).SetProp("_protected","1")
ps = [x[0] for x in rxn.RunReactants((acid,amine))]
[Chem.SanitizeMol(x) for x in ps]
Draw.MolsToGridImage(ps)
Clearing the property allows that atom to react again:
amine.GetAtomWithIdx(0).ClearProp("_protected")
ps = [x[0] for x in rxn.RunReactants((acid,amine))]
[Chem.SanitizeMol(x) for x in ps]
Draw.MolsToGridImage(ps)
Contribution from Nikolas Fechner.
Much more on this coming during Niko's session
import pandas as pd
from rdkit import Chem,rdBase
from rdkit.Chem import PandasTools
data =PandasTools.LoadSDF('data/d3_aid563770.sdf')
data.head()
ID | SMILES | ROMol | |
---|---|---|---|
0 | 498399 | COc1ccccc1N1CCN(CCCN2CCc3ccccc3C2=O)CC1 | |
1 | 498400 | COc1ccc(N2CCN(CCCN3CCc4ccccc4C3=O)CC2)cc1 | |
2 | 498401 | O=C1c2ccccc2CCN1CCCN1CCN(c2ccccn2)CC1 | |
3 | 498402 | O=C1c2ccccc2CCN1CCCN1CCN(c2ncccn2)CC1 | |
4 | 498475 | O=C1c2ccccc2CCN1CCCN1CCN(c2cccc(C(F)(F)F)c2)CC1 |
We can do things like substructure searches:
query = Chem.MolFromSmiles('c1ncccc1')
subset = data[data.ROMol >= query]
subset
ID | SMILES | ROMol | |
---|---|---|---|
2 | 498401 | O=C1c2ccccc2CCN1CCCN1CCN(c2ccccn2)CC1 | |
8 | 498479 | O=C1c2ccccc2CCCN1CCCN1CCN(c2ccccn2)CC1 | |
15 | 497391 | O=C1c2ccccc2CCN1CCCCN1CCN(c2ccccn2)CC1 | |
22 | 497602 | O=C1c2ccccc2CCCN1CCCCN1CCN(c2ccccn2)CC1 |
Add descriptors:
data['slogp']=data.ROMol.map(Descriptors.MolLogP)
data['amw'] =data.ROMol.map(Descriptors.MolWt)
data.head()
ID | SMILES | ROMol | slogp | amw | |
---|---|---|---|---|---|
0 | 498399 | COc1ccccc1N1CCN(CCCN2CCc3ccccc3C2=O)CC1 | 2.9058 | 379.504 | |
1 | 498400 | COc1ccc(N2CCN(CCCN3CCc4ccccc4C3=O)CC2)cc1 | 2.9058 | 379.504 | |
2 | 498401 | O=C1c2ccccc2CCN1CCCN1CCN(c2ccccn2)CC1 | 2.2922 | 350.466 | |
3 | 498402 | O=C1c2ccccc2CCN1CCCN1CCN(c2ncccn2)CC1 | 1.6872 | 351.454 | |
4 | 498475 | O=C1c2ccccc2CCN1CCCN1CCN(c2cccc(C(F)(F)F)c2)CC1 | 3.9160 | 417.475 |
And very easily do plots:
data.plot(x='amw',y='slogp',style='o')
<matplotlib.axes.AxesSubplot at 0x109455c50>
The FMCS code now has an optional argument that causes it to search for the MCS that appears in at least a given fraction of the input molecules.
Here's an example using some molecules from a ChEMBL beta2 adrenergic data set.
from rdkit.Chem import MCS
ms = [x for x in Chem.SDMolSupplier('data/beta2_adrenergic_aid37833.sdf')]
Draw.MolsToGridImage(ms,molsPerRow=5,legends=[x.GetProp('_Name') for x in ms])
Notice that these molecules share a common scaffold except for 5528, this leads to quite a small MCS:
mcs=MCS.FindMCS(ms,completeRingsOnly=True)
print mcs.smarts
mcsM = Chem.MolFromSmarts(mcs.smarts)
mcsM.UpdatePropertyCache()
Chem.SetHybridization(mcsM)
mcsM
[#7]-!@[#6]:1:[#6]:[#6]:[#6]:[#6]:[#6]:1
Using the threshold argument allows the algorithm to find an MCS that is more representative:
mcs=MCS.FindMCS(ms,completeRingsOnly=True,threshold=0.8)
print mcs.smarts
mcsM = Chem.MolFromSmarts(mcs.smarts)
mcsM.UpdatePropertyCache()
Chem.SetHybridization(mcsM)
mcsM
[#6]-@1-@[#6]-@[#7]-@[#6]-@[#6]-@[#7]-@1-!@[#6]:1:[#6]:[#6]:[#6]:[#6]:[#6]:1
Now we can render the molecules nicely aligned:
AllChem.Compute2DCoords(mcsM)
for m in ms:
if m.HasSubstructMatch(mcsM):
AllChem.GenerateDepictionMatching2DStructure(m,mcsM)
Draw.MolsToGridImage(ms,molsPerRow=5,legends=[x.GetProp('_Name') for x in ms])
Contribution from Paolo Tosco. More information coming in his presentation.
from rdkit.Chem import PyMol
v = PyMol.MolViewer()
m = Chem.MolFromSmiles('c1ccccc1C(=O)NCC')
mh = Chem.AddHs(m)
import time
AllChem.EmbedMolecule(mh)
v.ShowMol(mh)
v.GetPNG(preDelay=2)
AllChem.MMFFOptimizeMolecule(mh)
v.ShowMol(mh)
v.GetPNG(preDelay=2)
Getting information about the atom types and charges:
mp = AllChem.MMFFGetMoleculeProperties(mh)
for idx in range(mh.GetNumAtoms()):
print idx,mh.GetAtomWithIdx(idx).GetSymbol(),mp.GetMMFFAtomType(idx),mp.GetMMFFPartialCharge(idx)
0 C 37 -0.15 1 C 37 -0.15 2 C 37 -0.15 3 C 37 -0.15 4 C 37 -0.15 5 C 37 0.0862 6 C 3 0.5438 7 O 7 -0.57 8 N 10 -0.7301 9 C 1 0.3001 10 C 1 0.0 11 H 5 0.15 12 H 5 0.15 13 H 5 0.15 14 H 5 0.15 15 H 5 0.15 16 H 28 0.37 17 H 5 0.0 18 H 5 0.0 19 H 5 0.0 20 H 5 0.0 21 H 5 0.0
Contribution from Sereina Riniker
Riniker, S. & Landrum, G. A. "Similarity maps - a visualization strategy for molecular fingerprints and machine-learning methods." J Cheminf (2013). http://www.jcheminf.com/content/5/1/43
from rdkit.Chem.Draw import SimilarityMaps
from rdkit.Chem import rdMolDescriptors
from rdkit import DataStructs
fp1 = rdMolDescriptors.GetTopologicalTorsionFingerprint(ms[0])
fp2 = rdMolDescriptors.GetTopologicalTorsionFingerprint(ms[16])
print DataStructs.DiceSimilarity(fp1,fp2)
Draw.MolsToGridImage((ms[0],ms[16]))
0.388059701493
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],SimilarityMaps.GetTTFingerprint)
(<matplotlib.figure.Figure at 0x109786850>, 0.31663113006396593)
fp1 = rdMolDescriptors.GetAtomPairFingerprint(ms[0])
fp2 = rdMolDescriptors.GetAtomPairFingerprint(ms[16])
print DataStructs.DiceSimilarity(fp1,fp2)
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],SimilarityMaps.GetAPFingerprint)
0.338912133891
(<matplotlib.figure.Figure at 0x10a8dd2d0>, 0.043901287904228592)
fp1 = rdMolDescriptors.GetMorganFingerprint(ms[0],2)
fp2 = rdMolDescriptors.GetMorganFingerprint(ms[16],2)
print DataStructs.DiceSimilarity(fp1,fp2)
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],SimilarityMaps.GetMorganFingerprint)
0.455284552846
(<matplotlib.figure.Figure at 0x10a912650>, 0.17499999999999999)
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],lambda x,i:SimilarityMaps.GetMorganFingerprint(x,i,radius=3))
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],lambda x,i:SimilarityMaps.GetMorganFingerprint(x,i,radius=2))
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],lambda x,i:SimilarityMaps.GetMorganFingerprint(x,i,radius=1))
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[16],lambda x,i:SimilarityMaps.GetMorganFingerprint(x,i,radius=0))
(<matplotlib.figure.Figure at 0x10c86c790>, 0.068421052631578938)
fp1 = rdMolDescriptors.GetTopologicalTorsionFingerprint(ms[0])
fp2 = rdMolDescriptors.GetTopologicalTorsionFingerprint(ms[1])
print DataStructs.DiceSimilarity(fp1,fp2)
Draw.MolsToGridImage((ms[0],ms[1]))
0.6
SimilarityMaps.GetSimilarityMapForFingerprint(ms[0],ms[1],SimilarityMaps.GetTTFingerprint)
(<matplotlib.figure.Figure at 0x10afc0c10>, 0.19493670886075948)
We can use the same code to plot any property that is mappable back to an atom.
contribs = rdMolDescriptors._CalcCrippenContribs(ms[0])
_=SimilarityMaps.GetSimilarityMapFromWeights(ms[0],[x for x,y in contribs],colorMap='jet',contourLines=20)
contribs = list(rdMolDescriptors._CalcLabuteASAContribs(ms[0]))[0]
_=SimilarityMaps.GetSimilarityMapFromWeights(ms[0],contribs,colorMap='jet',contourLines=10)
nm = Chem.Mol(ms[0].ToBinary())
AllChem.MMFFSanitizeMolecule(nm)
mp = AllChem.MMFFGetMoleculeProperties(nm)
charges = [mp.GetMMFFPartialCharge(x) for x in range(nm.GetNumAtoms())]
_=SimilarityMaps.GetSimilarityMapFromWeights(nm,charges,colorMap='jet',contourLines=20)
print charges
[1.3328000000000002, -0.83819999999999995, -0.33700000000000002, -0.65000000000000002, -0.65000000000000002, 0.36909999999999998, -0.54000000000000004, 0.10000000000000001, 0.27000000000000002, -0.54000000000000004, 0.19900000000000001, 0.1052, 0.36909999999999998, 0.27000000000000002, -0.14349999999999999, 0.0, 0.0, 0.0, 0.0, 0.27000000000000002, 0.41349999999999998, 0.0, 0.0, 0.0, 0.0, 0.0]
The paper includes examples of using the same idea to show contributions of particular atoms to the prediction of a machine-learning model
Atoms have a new data member to store information about residues:
m = Chem.MolFromMolFile('data/seq.mol')
m.GetAtomWithIdx(0).SetMonomerInfo(Chem.AtomPDBResidueInfo("N",residueName="MET"))
m.GetAtomWithIdx(1).SetMonomerInfo(Chem.AtomPDBResidueInfo("CA",residueName="MET"))
m.GetAtomWithIdx(2).SetMonomerInfo(Chem.AtomPDBResidueInfo("O",residueName="MET"))
m.GetAtomWithIdx(3).SetMonomerInfo(Chem.AtomPDBResidueInfo("CB",residueName="MET"))
m.GetAtomWithIdx(4).SetMonomerInfo(Chem.AtomPDBResidueInfo("CG",residueName="MET"))
m.GetAtomWithIdx(5).SetMonomerInfo(Chem.AtomPDBResidueInfo("SD",residueName="MET"))
m.GetAtomWithIdx(6).SetMonomerInfo(Chem.AtomPDBResidueInfo("CE",residueName="MET"))
mi = m.GetAtomWithIdx(0).GetMonomerInfo()
mi
<rdkit.Chem.rdchem.AtomPDBResidueInfo at 0x10c84cf30>
mi.GetName()
'N'
mi.GetResidueName()
'MET'
Currently only support for PDB residues as monomers is fleshed out, but everything is in place to support other types of monomers as well.
Support for reading and writing PDB files has also been added:
NOTE: this is currently (30 September 2013) still on a branch. It will be merged with the trunk for the 2013_09 release.
crn = Chem.MolFromPDBFile('./data/1CRN.pdb')
for i in range(0,crn.GetNumAtoms(),10):
mi = crn.GetAtomWithIdx(i).GetMonomerInfo()
print i,mi.GetName(),mi.GetResidueName()
0 N THR 10 O THR 20 N CYS 30 CB PRO 40 CA ILE 50 O VAL 60 CA ARG 70 N SER 80 CB ASN 90 CD1 PHE 100 CG ASN 110 N CYS 120 CB ARG 130 O LEU 140 CG PRO 150 CB THR 160 N GLU 170 CA ALA 180 CG2 ILE 190 C ALA 200 N TYR 210 CZ TYR 220 CA GLY 230 CA ILE 240 O ILE 250 CG1 ILE 260 N GLY 270 CA THR 280 CB CYS 290 CA GLY 300 OD2 ASP 310 CE2 TYR 320 C ASN
v.ShowMol(crn)
Chem.Kekulize(crn,clearAromaticFlags=True)
AllChem.MMFFOptimizeMolecule(crn,maxIters=2000)
v.ShowMol(crn,'opt',showOnly=False)
v.GetPNG(preDelay=2)
Do a bigger protein:
fvd = Chem.MolFromPDBFile('data/2FVD.pdb')
v.ShowMol(fvd,'2FVD')
v.GetPNG(preDelay=2)
v.SetDisplayStyle('2FVD','cartoon')
v.server.do('ray')
v.GetPNG(preDelay=2)
smi = Chem.MolToSmiles(fvd,canonical=False) # <- if you forget the "canonical=False" for big proteins, you will be sad
smi
'NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C=O)CC(C)C)CCCNC(N)=N)C(CC)C)CCCCN)CCCCN)CC(C)C)C)C(C)C)C(C)C)CCC(=O)O)C(O)C)CC(C)C)CCCCN)CC(=O)N)CCCNC(N)=N)C)CCCCN)Cc1ccc(O)cc1)C(C)C)C(C)C)Cc1ccc(O)cc1)C(O)C)CCC(=O)O)C(CC)C)CCCCN)CCC(=O)O)C(C)C)CCCCN)CCC(=O)N)Cc1ccccc1)CC(=O)N)CCC(=O)O)CCSC.NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N1C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)N2C(C(=O)NC(C(=O)N3C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N4C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C=O)C)CC(C)C)Cc5ccccc5)CC(=O)O)C)CC(C)C)CCCCN)C(CC)C)C)CCC(=O)O)C(O)C)CC(=O)N)C(CC)C)CC(C)C)CC(C)C)CC(=O)N)CCC(=O)N)CCC4)CCCCN)CC(C)C)CC(=O)O)CCCNC(N)=N)Cc4nc[nH]c4)CC(C)C)C(C)C)CCCNC(N)=N)Cc4nc[nH]c4)CO)Cc4nc[nH]c4)CS)Cc4ccccc4)C)CC(C)C)CCC(=O)N)CC(C)C)CC(C)C)CCC(=O)N)Cc4ccccc4)CC(C)C)Cc4ccc(O)cc4)CO)CCCCN)C(CC)C)CC(C)C)CCC3)CC(C)C)CCC2)C(CC)C)C(O)C)CC(C)C)C)CO)C)CC(=O)O)CCSC)Cc2ccccc2)CCCCN)CCCCN)CC(C)C)CC(=O)O)CCC(=O)N)Cc2nc[nH]c2)CC(C)C)Cc2ccccc2)CCC(=O)O)Cc2ccccc2)C(C)C)CC(C)C)Cc2ccc(O)cc2)CC(C)C)CCCCN)CC(=O)N)CCC(=O)O)C(O)C)Cc2nc[nH]c2)C(CC)C)C(C)C)CC(=O)O)CC(C)C)CC(C)C)CCCCN)C(C)C)C(CC)C)CC(=O)N)CCC1)Cc1nc[nH]c1)CC(=O)N)CC(C)C)CCC(=O)O)CCCCN)CC(C)C)CC(C)C)CO)C(CC)C)CCC(=O)O)C.NC(C(=O)NCC(=O)NC(C(=O)N1C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N2C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N3C(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)N4C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N5C(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N6C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N7C(C(=O)NC(C(=O)NC(C(=O)N8C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N9C(C(=O)N%10C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NCC(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N%11C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N%12C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)N%13C(C(=O)NC(C(=O)N%14C(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)NC(C(=O)O)CC(C)C)CCCNC(N)=N)CC(C)C)Cc%15nc[nH]c%15)CCC%14)C(C)C)CCC%13)CCCCN)C(O)C)C(C)C)CC(=O)O)CCC(=O)N)Cc%13ccccc%13)Cc%13ccccc%13)CCC%12)Cc%12nc[nH]c%12)C)CC(C)C)C)C)CCCCN)C)CO)C(CC)C)CCCNC(N)=N)CCCCN)CC(=O)N)CCC%11)CC(=O)O)Cc%11ccc(O)cc%11)Cc%11nc[nH]c%11)CC(C)C)CCSC)CCC(=O)N)CO)CC(C)C)CC(C)C)CO)CCCNC(N)=N)CC(=O)O)CCC(=O)O)CC(=O)O)CC(C)C)CCC%10)CCC9)C(C)C)C(C)C)CCCCN)CO)Cc9ccccc9)CC(=O)O)CCC(=O)N)CCCNC(N)=N)C)Cc9c[nH]c%10c9cccc%10)CCCCN)CCC8)Cc8ccccc8)CO)CCC7)CCCCN)Cc7ccc(O)cc7)CC(=O)O)CCC6)CCSC)CO)C(O)C)C(C)C)CCC5)Cc5c[nH]c6c5cccc6)C(C)C)C(C)C)CCC(=O)O)CC(=O)O)CCC4)C(O)C)CC(C)C)C(O)C)CCCNC(N)=N)Cc4ccccc4)C(CC)C)CCCNC(N)=N)Cc4ccccc4)CC(C)C)CCC(=O)N)CC(=O)O)C(CC)C)CCC(=O)O)CO)CC(=O)O)CCC3)Cc3ccccc3)CC(C)C)C)CCCNC(N)=N)CCCNC(N)=N)C(O)C)C(C)C)CCSC)CCC(=O)O)C)Cc3ccccc3)C(CC)C)CS)CC(C)C)CO)Cc3c[nH]c4c3cccc4)C(CC)C)CC(=O)O)C(C)C)C)C(O)C)CO)Cc3ccc(O)cc3)Cc3ccc(O)cc3)CCCCN)CS)CC(C)C)CC(C)C)C(CC)C)CCC(=O)O)CCC2)C)CCCNC(N)=N)Cc2ccc(O)cc2)Cc2c[nH]c3c2cccc3)CC(C)C)C(O)C)C(C)C)C(C)C)CCC(=O)O)Cc2nc[nH]c2)C(O)C)Cc2ccc(O)cc2)C(O)C)CCCNC(N)=N)C(C)C)CCC1)C(C)C)Cc1ccccc1.N1(S(C)(O)O)CCC(NC2NCC(C(O)C3C(F)C(F)CCC3OC)C(N)N2)CC1.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O.O'
These are examples of external pieces of code that are built on top of and/or extend the RDKit, but that are not integrated into the distribution.
A "cartridge" that adds chemistry to SQLite
from pysqlite2 import dbapi2 as sqlite3
db = sqlite3.connect(':memory:')
db.enable_load_extension(True)
db.load_extension('/usr/local/src/chemicalite/build/libchemicalite.so')
db.execute("PRAGMA page_size=2048")
db.execute('create table mols(id integer primary key,smiles text, m mol)')
db.execute("select create_molecule_rdtree('mols','m')")
In [15]: %timeit curs.execute("select count(*) from mols where mol_is_substruct(m,'c1ccnnc1')")
1 loops, best of 3: 10.4 s per loop
In [16]: %timeit curs.execute("select count(*) from mols,str_idx_mols_m as idx where mols.id=idx.id and mol_is_substruct(mols.m,'c1ccnnc1') and idx.id match rdtree_subset(mol_bfp_signature('c1ccnnc1'))")
100000 loops, best of 3: 8.18 µs per loop
In [19]: q = Chem.MolFromSmiles('c1ccnnc1')
In [20]: %timeit [x for x in ms if x.HasSubstructMatch(q)]
1 loops, best of 3: 4.79 s per loop
Note that this is still an early version and needs refinement.
Code and data from the publication: S. Riniker, G. Landrum J. Cheminf. (2013) 5:26 http://www.jcheminf.com/content/5/1/26
Available on github: https://github.com/rdkit/benchmarking_platform