myChEMBL iPython Notebook Tutorial

A Chemoinformatics taster using the RDKit toolkit and cartridge, the ChEMBL database and Pandas

George Papadatos, ChEMBL group, EMBL-EBI

Start with something relatively easy

In [2]:
print 'Hello World!'
Hello World!
In [3]:
1+4
Out[3]:
5

Import RDKit libraries

In [4]:
from rdkit.Chem import AllChem as Chem
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Descriptors
from rdkit import DataStructs

Simple RDKit stuff - Molecules, descriptors and similarity

Molecule from SMILES

In [5]:
smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' #sildenafil
m = Chem.MolFromSmiles(smi)
In [6]:
m
Out[6]:

Simple descriptors

In [7]:
Descriptors.MolWt(m)
Out[7]:
474.5870000000004
In [8]:
Descriptors.TPSA(m)
Out[8]:
113.41999999999999
In [9]:
Descriptors.RingCount(m)
Out[9]:
4

Output to various text formats

In [10]:
Chem.MolToSmiles(m, True)
Out[10]:
'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O'
In [11]:
Chem.MolToInchi(m)
Out[11]:
'InChI=1S/C22H30N6O4S/c1-5-7-17-19-20(27(4)25-17)22(29)24-21(23-19)16-14-15(8-9-18(16)32-6-2)33(30,31)28-12-10-26(3)11-13-28/h8-9,14H,5-7,10-13H2,1-4H3,(H,23,24,29)'
In [12]:
print Chem.MolToMolBlock(m)
     RDKit          

 33 36  0  0  0  0  0  0  0  0999 V2000
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.0000    0.0000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0
  2  3  1  0
  3  4  1  0
  4  5  2  0
  5  6  1  0
  6  7  1  0
  6  8  1  0
  8  9  1  0
  9 10  2  0
  9 11  1  0
 11 12  1  0
 12 13  2  0
 13 14  1  0
 12 15  1  0
 15 16  2  0
 16 17  1  0
 17 18  2  0
 18 19  1  0
 19 20  2  0
 20 21  1  0
 21 22  1  0
 22 23  1  0
 17 24  1  0
 24 25  2  0
 24 26  2  0
 24 27  1  0
 27 28  1  0
 28 29  1  0
 29 30  1  0
 30 31  1  0
 30 32  1  0
 32 33  1  0
 14  4  1  0
 14  8  2  0
 20 15  1  0
 33 27  1  0
M  END

In [13]:
Chem.Compute2DCoords(m)
Out[13]:
0
In [14]:
print Chem.MolToMolBlock(m)
     RDKit          2D

 33 36  0  0  0  0  0  0  0  0999 V2000
   -8.2094    2.2189    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -7.5208    0.8863    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -6.0224    0.8163    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -5.3338   -0.5163    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -6.0072   -1.8566    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -4.9405   -2.9112    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -5.1666   -4.3941    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.6079   -2.2226    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.2044   -2.7522    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.9613   -4.2323    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
   -1.0441   -1.8015    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -1.2872   -0.3214    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.6907    0.2082    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -3.8510   -0.7424    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.1269    0.6292    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    1.2765    0.0997    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.4368    1.0503    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    2.1937    2.5305    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.7903    3.0600    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.3700    2.1094    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -1.7734    2.6390    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
   -2.0166    4.1191    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4200    4.6487    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    3.8402    0.5208    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0
    3.3107   -0.8826    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    4.3698    1.9242    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0
    5.2436   -0.0088    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    5.4867   -1.4889    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    6.8902   -2.0185    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    8.0505   -1.0679    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
    9.4539   -1.5974    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    7.8074    0.4123    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    6.4039    0.9418    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  1  0
  2  3  1  0
  3  4  1  0
  4  5  2  0
  5  6  1  0
  6  7  1  0
  6  8  1  0
  8  9  1  0
  9 10  2  0
  9 11  1  0
 11 12  1  0
 12 13  2  0
 13 14  1  0
 12 15  1  0
 15 16  2  0
 16 17  1  0
 17 18  2  0
 18 19  1  0
 19 20  2  0
 20 21  1  0
 21 22  1  0
 22 23  1  0
 17 24  1  0
 24 25  2  0
 24 26  2  0
 24 27  1  0
 27 28  1  0
 28 29  1  0
 29 30  1  0
 30 31  1  0
 30 32  1  0
 32 33  1  0
 14  4  1  0
 14  8  2  0
 20 15  1  0
 33 27  1  0
M  END

Fingerprints and similarity

In [15]:
fp = Chem.GetMorganFingerprintAsBitVect(m,2,nBits=2048)
In [16]:
fp.ToBitString()
Out[16]:
'00000000000000100000000000000000000000000000000000000000000000000000000001000000100000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000010000000000000000000000000000001000000000000000000000000010000000000000000000100001000000000000000000000000000000101000010000000010000000000010000000000000000001000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000010000000000000000000010000000000000000000000000000000000000000000000000000000000000000100000000000000000000010000000000000000000000100000000000000000001000000000000000000000000000000000010000001000000000000000000000010000000000000100000100000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000100000000100000000000000000000000000000000000000000100000000000000000000000000010000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000001000000001000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010100001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000010000000000010000000000000000000000010001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000100000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000101001000000000000000100001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000001000100000000000000000001000000000000000000000000000000000000000000000000000010000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000'
In [17]:
fp.GetNumOnBits()
Out[17]:
61
In [18]:
fp.GetNumBits()
Out[18]:
2048
In [19]:
smi2 = 'CCCc1nc(C)c2C(=O)N=C(Nn12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(CC)CC4' #vardenafil
m2 = Chem.MolFromSmiles(smi2)
In [20]:
fp2 = Chem.GetMorganFingerprintAsBitVect(m2, 2, nBits=2048)
In [21]:
m2
Out[21]:
In [22]:
m
Out[22]:
In [23]:
DataStructs.TanimotoSimilarity(fp,fp2)
Out[23]:
0.5

Similarity Maps

In [24]:
from rdkit.Chem.Draw import SimilarityMaps
SimilarityMaps.GetSimilarityMapForFingerprint(m2, m, SimilarityMaps.GetMorganFingerprint)
Out[24]:
(<matplotlib.figure.Figure at 0x2b60210>, 0.14414414414414412)

Using the RDKit database cartridge in myChEMBL

In [25]:
sma = 'C1C[!#1!#6]1' #oxirane or aziridine
In [26]:
from IPython.display import Image
from urllib import quote_plus
In [27]:
Image(url='http://www.smartsview.de/smartsview/auto/png/1/dynamic/{0}'.format(quote_plus(sma)))
Out[27]:
In [28]:
import psycopg2
In [29]:
conn = psycopg2.connect(port=5432, user='chembl', dbname='chembl_17')
In [30]:
cur = conn.cursor()
In [31]:
sql1 = """
SELECT mr.*, md.chembl_id, cp.full_mwt, cp.alogp
from mols_rdkit mr, molecule_dictionary md, compound_properties cp
where
mr.m @> 'C1C[!#1!#6]1'::qmol
and
mr.molregno = md.molregno
and
md.molregno = cp.molregno
limit 100
"""
In [32]:
cur.execute(sql1)
In [33]:
for c in cur: print c
(1296551, 'CC(=O)OC1C[[email protected]@H](C)C2(CC(c3ccoc3)OC2=O)C2CCC(O)C3(CO3)C12CO', 'CHEMBL1975260', Decimal('420.45'), Decimal('0.57'))
(1240102, 'O=C(CCN1CC1)OCCOC(=O)CCN1CC1', 'CHEMBL1899531', Decimal('256.30'), Decimal('0.01'))
(1296481, 'CCN(CC)C(=O)CCN1CC1', 'CHEMBL1975190', Decimal('170.25'), Decimal('0.35'))
(1235869, 'Clc1ccc(N(CC2CO2)CC2CO2)cc1', 'CHEMBL1895298', Decimal('239.70'), Decimal('2.09'))
(1295075, 'O=C(C1OC1c1ccc([N+](=O)[O-])cc1)C12CC3CC(CC(C3)C1)C2', 'CHEMBL1973784', Decimal('327.37'), Decimal('3.65'))
(1246669, 'C[[email protected]]1OP(=O)(Oc2ccccc2)C[[email protected]@H]2O[[email protected]@H]21', 'CHEMBL1906098', Decimal('240.19'), Decimal('1.20'))
(1231326, 'N#C[[email protected]]1C2OC2c2ccccc2N1C(=O)c1ccccc1', 'CHEMBL1890755', Decimal('276.29'), Decimal('2.19'))
(1218973, 'CC12OC1C(O)C(Br)=C(CO)C2O', 'CHEMBL1878402', Decimal('251.07'), Decimal('-0.80'))
(1295513, 'OC1c2ccccc2C(O)C2OC21', 'CHEMBL1974222', Decimal('178.18'), Decimal('0.24'))
(1296770, 'CC(=O)OC1(C#N)CC2OC1C1C2N1C(=O)c1ccccc1', 'CHEMBL1975479', Decimal('298.29'), Decimal('0.41'))
(1295729, 'O=C(c1ccc(Cl)cc1)C1OC12C(=O)Nc1ccccc12', 'CHEMBL1974438', Decimal('299.71'), Decimal('2.41'))
(1294930, 'CO/C(=N/N1CC1C(F)(F)F)c1ccncc1', 'CHEMBL1973639', Decimal('245.20'), Decimal('1.36'))
(1296542, 'COc1ccc(C[[email protected]]2NC(=O)C=CC[[email protected]@H]([[email protected]](C)[[email protected]]3O[[email protected]@H]3c3ccccc3)OC(=O)[[email protected]](CC(C)C)OC(=O)[[email protected]](C)CNC2=O)cc1', 'CHEMBL1975251', Decimal('620.73'), Decimal('4.85'))
(1295673, 'C=C1C(=O)OC2CCCCC3OC3C12', 'CHEMBL1974382', Decimal('194.23'), Decimal('1.54'))
(1202551, 'Cc1ccc(/C(=N/O)N2CC2C)c(Oc2ccc3oc4ccccc4c3c2)n1', 'CHEMBL1861933', Decimal('373.40'), Decimal('4.24'))
(1290807, 'COC(C/C=C/N(C)C=O)C(C)C(=O)CCC(C)C(OC)C(C)C1OC(=O)C=CC2OC2(C)CC(OC)C(OC)C2=CC(=O)O[[email protected]]([[email protected]]2O)C(C)C(OC)CC(OC)C=CC(C)C(O)CC(OC)C=CC1C', 'CHEMBL1969516', Decimal('1006.27'), None)
(1231532, 'O=C1C=C(N2CC2)c2ccccc2C1=O', 'CHEMBL1890961', Decimal('199.21'), Decimal('1.34'))
(1243825, 'COC(C[[email protected]@H]1O[[email protected]]1C(=O)COCc1ccccc1)OC', 'CHEMBL1903254', Decimal('280.32'), Decimal('1.13'))
(1209176, 'N#CC1(C#N)OC12CCS(=O)(=O)c1ccccc12', 'CHEMBL1868605', Decimal('260.27'), Decimal('0.52'))
(1237949, 'c1cc(OCCN2CC2)ccn1', 'CHEMBL1897378', Decimal('164.20'), Decimal('0.62'))
(1291819, 'O=c1n(Cc2ccccc2)c2ccccc2n1CC1CS1', 'CHEMBL1970528', Decimal('296.39'), Decimal('3.15'))
(1287453, 'C=C1C(=O)O[[email protected]@H]2C=C(C)C=C[[email protected]@H]3O[[email protected]@]3(C)C[[email protected]@H](O)C12', 'CHEMBL1966162', Decimal('262.30'), Decimal('1.12'))
(1296095, 'COC(=O)C1(COCc2ccccc2)CCCC2OC21', 'CHEMBL1974804', Decimal('276.33'), Decimal('2.16'))
(1296635, 'COc1cc(O)c2c(c1)C(O)C(O)CC1OC1C(=O)C=CCC(C)OC2=O', 'CHEMBL1975344', Decimal('378.37'), Decimal('0.90'))
(1302623, 'COc1cc2cc(c1Cl)N(C)C(=O)C[[email protected]](OC(=O)[[email protected]](C)N(C)C(C)=O)[[email protected]]1(C)O[[email protected]]1[[email protected]](C)[[email protected]@H]1C[[email protected]@](O)(NC(=O)O1)[[email protected]](OC)C=CC=C(C)C2', 'CHEMBL1981332', Decimal('692.20'), Decimal('3.07'))
(1290845, 'C=C1CC2(C)CCC(OC(=O)C3(C)OC3C)C(C)(OC(C)=O)C2CC1=C(C)C', 'CHEMBL1969554', Decimal('390.51'), Decimal('4.15'))
(1297691, 'Cc1cn([[email protected]@H]2O[[email protected]](CO[Si](C)(C)C(C)(C)C)[[email protected]]3(O[[email protected]]3C(=O)NO)[[email protected]]2O[Si](C)(C)C(C)(C)C)c(=O)[nH]c1=O', 'CHEMBL1976400', Decimal('557.78'), None)
(1243276, 'CC12CCC3C(CCC4CC5OC5CC43C)C1CCC21OCCO1', 'CHEMBL1902705', Decimal('332.48'), Decimal('3.27'))
(1297762, 'CC1CCC2C(C(=O)O[[email protected]@H]3[[email protected]@H]4O[[email protected]]4(CO)C4C3C=CO[[email protected]]4OC3OC(CO)C(O)C(O)C3O)=CO[[email protected]@H](OC3OC(CO)C(O)C(O)C3O)C12', 'CHEMBL1976471', Decimal('704.67'), Decimal('-3.95'))
(1297121, 'CC1(C)C2CCC3(OCCO3)C1C1OC12', 'CHEMBL1975830', Decimal('210.27'), Decimal('0.88'))
(1297766, 'CC(=O)O[[email protected]@H]1[[email protected]]2O[[email protected]]2[[email protected]]2O[[email protected]@]2(COC(=O)c2ccccc2)[[email protected]]1OC(C)=O', 'CHEMBL1976475', Decimal('362.33'), Decimal('0.59'))
(1297698, 'CC(=O)OC1OC(c2ccoc2)C[[email protected]@]12C1CCC3O[[email protected]]3(C)[[email protected]]1(C)C[[email protected]](OC(C)=O)[[email protected]]2C', 'CHEMBL1976407', Decimal('432.51'), Decimal('2.47'))
(1297541, 'C=CC1(CCC(Br)C(C)(C)Cl)CO1', 'CHEMBL1976250', Decimal('267.59'), Decimal('3.10'))
(1297560, '[O-][P-](Oc1ccc2ccccc2c1)(N1CC1)N1CC1', 'CHEMBL1976269', Decimal('274.25'), Decimal('0.92'))
(1297481, 'CC(=O)c1ccc2c3c1[[email protected]@H]1O[[email protected]@H]1c1cccc(c1-3)[[email protected]@H]1O[[email protected]]21', 'CHEMBL1976190', Decimal('276.29'), Decimal('1.96'))
(1299670, 'CC1=CC2O[[email protected]@H]3C[[email protected]]4OC(=O)C=CC=CC56OCCC7(OC7C(=O)OCC2(CC1)[[email protected]]4(C)[[email protected]]31CO1)C5OC(O)C6O', 'CHEMBL1978379', Decimal('558.57'), Decimal('-0.18'))
(1299584, 'C=C1[[email protected]](OC(C)=O)[[email protected]]2[[email protected]@H](OC(C)=O)[[email protected]](C)(OC(C)=O)C[[email protected]]2(OC(C)=O)C(=O)[[email protected]](C)[[email protected]@H]2O[[email protected]]2C(C)(C)[[email protected]](OC(C)=O)[[email protected]](OC(C)=O)[[email protected]]1OC(=O)C(C)C', 'CHEMBL1978293', Decimal('738.77'), Decimal('1.75'))
(1298308, 'C=C(C(=O)OC)C1C[[email protected]@H](OC(C)=O)C2=C[[email protected]](C[[email protected]@]3(C)O[[email protected]@H]3c3cc(C)c(o3)[[email protected]]1OC(C)=O)OC2=O', 'CHEMBL1977017', Decimal('488.48'), Decimal('2.20'))
(1287993, 'C=C1C(=O)OC2C3OC3(C)CCC=C(C)CC(=O)C12', 'CHEMBL1966702', Decimal('262.30'), Decimal('1.78'))
(1271919, 'CC(=O)OCC1=C(C)C[[email protected]]([[email protected]@H](C)[[email protected]]2CC[[email protected]]3[[email protected]@H]4C[[email protected]]5O[[email protected]]56[[email protected]@H](OC(C)=O)C=CC(=O)[[email protected]]6(COC(C)=O)[[email protected]]4CC[[email protected]]23C)OC1=O', 'CHEMBL1934450', Decimal('612.71'), Decimal('3.43'))
(1271928, 'CC1=C(CO)C(=O)O[[email protected]@H]([[email protected]@H](C)[[email protected]]2CC[[email protected]]3[[email protected]@H]4C[[email protected]]5O[[email protected]]56[[email protected]@H](O)[[email protected]@H](OS(=O)(=O)O)CC(=O)[[email protected]]6(CO)[[email protected]]4CC[[email protected]]23C)C1', 'CHEMBL1934459', Decimal('584.68'), Decimal('1.14'))
(1271936, 'CC1=C(CO[[email protected]@H]2O[[email protected]](CO)[[email protected]@H](O)[[email protected]](O)[[email protected]]2O)C(=O)O[[email protected]@H]([[email protected]@H](C)[[email protected]]2CC[[email protected]]3[[email protected]@H]4C[[email protected]]5O[[email protected]]56[[email protected]@H](O)C=CC(=O)[[email protected]]6(C)[[email protected]]4CC[[email protected]]23C)C1', 'CHEMBL1934467', Decimal('632.74'), Decimal('1.64'))
(1288183, 'O=C1c2c3c4ccccc4[nH]c3c3c(c4ccccc4n3CC3CO3)c2C(=O)N1Cc1ccccc1', 'CHEMBL1966892', Decimal('471.51'), Decimal('5.35'))
(1285594, 'Cc1ccc(OCC2CO2)c(Br)c1', 'CHEMBL1964303', Decimal('243.10'), Decimal('2.76'))
(1284906, 'O=C(CCc1ccc(F)cc1)c1cc(F)ccc1OCC1CO1', 'CHEMBL1963195', Decimal('318.31'), Decimal('3.84'))
(1284905, 'O=C(CCc1ccc(F)cc1)c1ccccc1OCC1CO1', 'CHEMBL1963194', Decimal('300.32'), Decimal('3.63'))
(1284904, 'O=C(CCc1ccccc1)c1ccccc1OCC1CO1', 'CHEMBL1963193', Decimal('282.33'), Decimal('3.43'))
(1299546, 'CCOC(=O)[[email protected]]1O[[email protected]@H]1C(=O)NC(CC(C)C)C(=O)NCCC(C)C', 'CHEMBL1978255', Decimal('342.43'), Decimal('1.61'))
(1216668, 'O=C(O)C1OC1(c1ccccc1Cl)c1ccccc1Cl', 'CHEMBL1876097', Decimal('309.14'), Decimal('3.87'))
(1228380, 'COC(C[[email protected]@H]1O[[email protected]@H]1[[email protected]@H](O)[[email protected]@H](C)OCc1ccccc1)OC', 'CHEMBL1887809', Decimal('296.36'), Decimal('1.44'))
(1211913, 'CCOC(=O)c1ccc(N(CC2CO2)S(=O)(=O)c2ccc(C)cc2)cc1', 'CHEMBL1871342', Decimal('375.44'), Decimal('2.93'))
(1300104, 'COc1cc2c(c3oc4c(O)cccc4c(=O)c13)C(C1(C)CO1)CO2', 'CHEMBL1978813', Decimal('340.33'), Decimal('2.32'))
(1228204, 'CC[[email protected]@H](c1ccccc1)n1c(=O)n2n(c1=O)[[email protected]]1[[email protected]](O)[[email protected]@H]3O[[email protected]@H]3/C(=N\\OCc3ccccc3)[[email protected]]1CC2', 'CHEMBL1887633', Decimal('476.52'), Decimal('2.67'))
(1299787, 'O=C1c2ccccc2OCC12OC21C=CC(Cl)=CC1', 'CHEMBL1978496', Decimal('274.70'), Decimal('2.35'))
(1299302, 'O=C(O)CCCCCCC[[email protected]]1S[[email protected]]1CCCCCCO', 'CHEMBL1978011', Decimal('302.47'), Decimal('4.47'))
(1300017, 'C=C(C)[[email protected]@H]1C[[email protected]]2O[[email protected]](O)(C(=C)CC(=O)C=C(C)C[[email protected]@H]3OC(=O)[[email protected]]24O[[email protected]@H]34)[[email protected]]1O', 'CHEMBL1978726', Decimal('376.40'), Decimal('1.35'))
(1268906, 'Cc1coc2c1[[email protected]]1C=C(CC[[email protected]@H]3O[[email protected]@]3(C)C2)C(=O)O1', 'CHEMBL1927944', Decimal('260.29'), Decimal('2.29'))
(1251649, 'C=C1C(=O)O[[email protected]@H]2C[[email protected]@]3(C)O[[email protected]@H]3CC[[email protected]@]3(C)O[[email protected]]3C[[email protected]@H]12', 'CHEMBL1912039', Decimal('264.32'), Decimal('1.30'))
(1251654, 'C=C1C(=O)O[[email protected]@H]2C[[email protected]](C)[[email protected]]3(CCC(C)O)O[[email protected]@H]3C[[email protected]]12', 'CHEMBL1912044', Decimal('266.33'), Decimal('1.73'))
(1251661, 'C=C1C(=O)O[[email protected]]2C[[email protected]](C)[[email protected]@H]3CC[[email protected]@]4(C)O[[email protected]@]34C[[email protected]]12', 'CHEMBL1912051', Decimal('248.32'), Decimal('2.21'))
(1288152, 'COC(=O)[[email protected]@H]1O[[email protected]]12[[email protected]@H](CO[Si](C)(C)C(C)(C)C)O[[email protected]@H](n1cc(C)c(=O)[nH]c1=O)[[email protected]@H]2O[Si](C)(C)C(C)(C)C', 'CHEMBL1966861', Decimal('556.80'), None)
(1299033, 'COC(=O)/C=C/[[email protected]@H]1[[email protected]](C)N1S(=O)(=O)c1ccc(C)cc1', 'CHEMBL1977742', Decimal('295.35'), Decimal('2.05'))
(1286713, 'CN1c2ccc(Cl)cc2C2(c3ccccc3)N(CC1=O)C2(Cl)Cl', 'CHEMBL1965422', Decimal('367.66'), Decimal('4.24'))
(1301826, 'CC(=O)O[[email protected]]1CC(C)(C)C(=C=C/C(C)=C/C=C/C=C\\C=C(C)\\C=C2\\C=C(/C=C/[[email protected]@]34O[[email protected]]3(C)C[[email protected]@H](O)CC4(C)C)C(=O)O2)[[email protected]](C)(O)C1', 'CHEMBL1980535', Decimal('630.81'), Decimal('5.36'))
(1303258, 'CC(CO)[[email protected]]1OC(=O)C=C2[[email protected]@]13O[[email protected]@H]3[[email protected]]1OC(=O)[[email protected]@]3(C)[[email protected]]4O[[email protected]]4C[[email protected]@]2(C)[[email protected]@H]13', 'CHEMBL1981967', Decimal('362.37'), Decimal('0.17'))
(1295439, 'COC1(OC)C[[email protected]](C)[[email protected]@]23O[[email protected]]24c2cc(O)c5c(c2N[[email protected]]3C#CC=CC#C[[email protected]@]41O)C(=O)c1ccccc1C5=O', 'CHEMBL1974148', Decimal('509.51'), Decimal('2.87'))
(1295527, 'CC(=O)OCC1OC(OC2CC3C(C)(C)C(O)CC[[email protected]]3(C)C3CCC45CC4(CC[[email protected]]5C(C)CC(O)C4OC4(C)C)[[email protected]]23C)C(O)C(O)C1O', 'CHEMBL1974236', Decimal('678.89'), Decimal('2.82'))
(1321195, 'CC(=O)OCC1OC(OC2CC3C(C)(C)C(OC(C)=O)CC[[email protected]]3(C)C3CCC45CC4(CC[[email protected]]5C(C)CC(O)C4OC4(C)C)[[email protected]]23C)C(O)C(O)C1O', 'CHEMBL1999904', Decimal('720.93'), Decimal('3.19'))
(1301478, 'COC1(OC)C[[email protected]](C)[[email protected]@]23O[[email protected]]24C2=CC(=O)C=CC2=N[[email protected]]3C#CC=CC#C[[email protected]@]41O', 'CHEMBL1980187', Decimal('377.39'), Decimal('1.40'))
(1276021, 'C=C(C(=O)[[email protected]](OC(C)=O)[[email protected]@H](C)[[email protected]]1[[email protected]@H](OC(C)=O)C[[email protected]@]2(C)[[email protected]@H]3[[email protected]]4O[[email protected]]4[[email protected]]4[[email protected]](C)C(=O)C=C[[email protected]@]45C[[email protected]@]35CC[[email protected]]12C)[[email protected]@H](C)CO', 'CHEMBL1941159', Decimal('582.72'), Decimal('2.93'))
(1288224, 'CC(=O)O[[email protected]]1C[[email protected]]2(C(C)(C)O)C(=C1C)[[email protected]@H](OC(C)=O)C(OC(C)=O)[[email protected]@]1(C)[[email protected]]([[email protected]@H]2OC(C)=O)[[email protected]@]2(CO2)C(OC(C)=O)C[[email protected]@H]1OC(C)=O', 'CHEMBL1966933', Decimal('652.68'), Decimal('-0.21'))
(1301178, 'Cc1cc2c(c3oc(C4(C)OC4C4OC4C)cc(=O)c13)C(=O)c1c(O)c(C3CC(C)(N(C)C)C(O)C(C)O3)cc(C3CC(N(C)C)C(O)C(C)O3)c1C2=O', 'CHEMBL1979887', Decimal('746.84'), Decimal('2.52'))
(1321506, 'CC1CN1C(=O)NCCCCCCNC(=O)N1CC1C', 'CHEMBL2000215', Decimal('282.38'), Decimal('0.92'))
(1310842, 'C[[email protected]]12CCC3[[email protected]@H](CCC4=CC(=O)CC[[email protected]@]43C)C1CC[[email protected]@H]2OC(=O)C12OC1CCC2=O', 'CHEMBL1989551', Decimal('412.52'), Decimal('3.59'))
(1286420, 'C=C1C(=O)OC2C=C(C)C3OC3C=C(C(=O)OC)C(OC(C)=O)C(OC(=O)C(C)(O)C(C)OC(C)=O)C12', 'CHEMBL1965129', Decimal('522.50'), Decimal('0.91'))
(1311072, 'CC(=O)OC[[email protected]]12C[[email protected]](OC(=O)CC(C)C)C(C)=C[[email protected]]1OC1[[email protected]](O)[[email protected]@H](OC(C)=O)[[email protected]@]2(C)[[email protected]@]12CO2', 'CHEMBL1989781', Decimal('466.52'), Decimal('0.99'))
(1309747, 'CC1=CC2OC3CC(OC(=O)/C=C/C=C/C(OCCC4=CC(=O)OC4)C(C)O)C(C)(C34CO4)C2(CO)CC1', 'CHEMBL1988456', Decimal('530.61'), Decimal('1.70'))
(1310737, 'CC1CCOC(=O)C=CC=CC(=O)OC2CC3OC4C5OC5(C)C(O)CC4(COC(=O)C1O)C2(C)C31CO1', 'CHEMBL1989446', Decimal('534.55'), Decimal('0.02'))
(1312614, 'CC(=O)OCC1=C(C)C[[email protected]]([[email protected]@H](COC(C)=O)C2CCC3C4C[[email protected]]5O[[email protected]]56CC=CC(=O)[[email protected]]6(C)C4CC[[email protected]]23C)OC1=O', 'CHEMBL1991323', Decimal('554.67'), Decimal('3.76'))
(1314509, 'CNC(=O)C(C)C1C(=O)/C(=C(O)/C=C/C(C)=C/C(C)C2OC3(C)OC(C=CC34CO4)C2C)C(=O)N1C1CCC(O)C(C)O1', 'CHEMBL1993218', Decimal('600.70'), Decimal('1.16'))
(1320933, 'CC1=CC2OC3CC4OC(=O)C=CC=CC(C(C)O)OCCC(C)C(O)C(=O)OCC2(CC1O)C4(C)C31CO1', 'CHEMBL1999642', Decimal('548.62'), Decimal('0.80'))
(1322083, 'CC1(C)CCCC2OC2CCC(C)(C)C1=O', 'CHEMBL2000792', Decimal('224.34'), Decimal('3.46'))
(1314654, 'O=c1sc2ccccc2n1CC1CS1', 'CHEMBL1993363', Decimal('223.31'), Decimal('2.57'))
(1301689, 'CC(=O)O[[email protected]]1[[email protected]@H]2O[[email protected]@]2(C)CCC=C(C)C[[email protected]](OC(C)=O)[[email protected]]1C(C)C', 'CHEMBL1980398', Decimal('338.44'), Decimal('2.92'))
(1314212, 'O=C(c1ccccc1)C1OC12C(=O)Nc1ccccc12', 'CHEMBL1992921', Decimal('265.26'), Decimal('1.75'))
(1300173, 'C=C1C(=O)OC2CC3(C)OC3C3OC3C3=CC(OC3=O)C12', 'CHEMBL1978882', Decimal('290.27'), Decimal('0.13'))
(1322034, 'C[[email protected]](O)[[email protected]]1C=CC=CC(=O)O[[email protected]@H]2C[[email protected]]3O[[email protected]@H]4[[email protected]@H]5O[[email protected]]5(C)CCC4(COC(=O)[[email protected]]4O[[email protected]]4(C)[[email protected]@H](O)CO1)[[email protected]]2(C)[[email protected]]31CO1', 'CHEMBL2000743', Decimal('562.61'), Decimal('-0.21'))
(1313881, 'COc1ccc2c(c1)C1(C)CCC(O2)C12CO2', 'CHEMBL1992590', Decimal('232.28'), Decimal('2.11'))
(1312125, 'C=CCOC(=O)N1c2ccc(O)cc2[[email protected]@]23O[[email protected]]24[[email protected]@H](C)CC(OC)(OC)[[email protected]@]3(O)C#CC=CC#C[[email protected]]14', 'CHEMBL1990834', Decimal('463.48'), Decimal('2.66'))
(1313562, 'CC(=O)OC(C[[email protected]](C)[[email protected]@H]1CC[[email protected]]2(C)C3=CCC4C(C)(C)C(=O)CC[[email protected]]4(C)C3CCC12C)C1OC1(C)C', 'CHEMBL1992271', Decimal('498.74'), Decimal('6.14'))
(1300502, 'Cc1cn([[email protected]@H]2O[[email protected]](CO[Si](C)(C)C(C)(C)C)[[email protected]]3(O[[email protected]]3C(=O)NN)[[email protected]]2O[Si](C)(C)C(C)(C)C)c(=O)[nH]c1=O', 'CHEMBL1979211', Decimal('556.80'), None)
(1313878, 'CC1OC12CC(C)C(C)(O)C(=O)OCC1=CCN(C)CCC(OC2=O)C1=O', 'CHEMBL1992587', Decimal('381.42'), Decimal('0.63'))
(1313202, 'CC(=O)OC1CC2C(C)(C)C(=O)C=C[[email protected]]2(C)C2CCC3(C)C(c4ccoc4)[[email protected]@H](O)[[email protected]]4O[[email protected]]43[[email protected]]12C', 'CHEMBL1991911', Decimal('468.58'), Decimal('3.09'))
(1302651, 'C=C(C)C12OC1[[email protected]@]1(C)C(=CC2=O)CCC(O)[[email protected]@H]1C', 'CHEMBL1981360', Decimal('248.32'), Decimal('1.99'))
(1309841, 'C[[email protected]]12CCC3C4=C(CCC3C1CCC2=O)C(=O)[[email protected]]1O[[email protected]]1C4=O', 'CHEMBL1988550', Decimal('300.35'), Decimal('2.02'))
(1313550, 'CC1=C[[email protected]]2O[[email protected]@H]3C[[email protected]]4OC(=O)C=CC=CC(C(C)O)OCC[[email protected]@H](C)[[email protected]](O)C(=O)OC[[email protected]@]2(CC1)[[email protected]]4(C)[[email protected]]31CO1', 'CHEMBL1992259', Decimal('532.62'), Decimal('1.90'))
(1321613, 'C/C=C/C/C=C/CCC(=O)C1OC1C(N)=O', 'CHEMBL2000322', Decimal('223.27'), Decimal('1.09'))
(1309900, 'COc1ccc(/C=C/C(=O)c2c(-c3ccccc3)nn(C)c(=O)c2N2CC2C)cc1', 'CHEMBL1988609', Decimal('401.46'), Decimal('3.43'))
(1320419, 'CC1CCC2(C)C(CC=C(C=O)C2(O)C=O)C12CO2', 'CHEMBL1999128', Decimal('264.32'), Decimal('0.99'))
(1314159, 'CC(=O)OC1(C#N)CC2OC1C1C2N1C(=O)OC(C)(C)C', 'CHEMBL1992868', Decimal('294.30'), Decimal('0.32'))
In [34]:
smi = 'CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4' #sildenafil
In [35]:
sql2 = """
select molregno,m as smiles,tanimoto_sml(morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol),mfp2) as similarity
from fps_rdkit join mols_rdkit using (molregno)
where morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol)%mfp2
order by morganbv_fp('CCCc1nn(C)c2C(=O)NC(=Nc12)c3cc(ccc3OCC)S(=O)(=O)N4CCN(C)CC4'::mol)<%>mfp2;
"""
In [36]:
cur.execute(sql2)
In [37]:
for c in cur: print c
(410802, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 1.0)
(1351310, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O', 0.88135593220339)
(1351311, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O', 0.88135593220339)
(80636, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O', 0.866666666666667)
(80694, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O', 0.838709677419355)
(488008, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4)CC3)ccc1OCC)[nH]c2=O', 0.825396825396825)
(410662, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.8125)
(512303, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(C(N)=O)CC3)ccc1OCC)[nH]c2=O', 0.8125)
(1334756, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(C)nn(C)c2c(=O)[nH]1', 0.8)
(488151, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C4CCCCC4)CC3)ccc1OCC)[nH]c2=O', 0.8)
(410656, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(C(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.8)
(488072, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccc(F)cc4)CC3)ccc1OCC)[nH]c2=O', 0.8)
(488147, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccc(Cl)cc4)CC3)ccc1OCC)[nH]c2=O', 0.787878787878788)
(488073, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4Cl)CC3)ccc1OCC)[nH]c2=O', 0.787878787878788)
(1351309, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N(CC)CC)ccc1OCC)[nH]c2=O', 0.783333333333333)
(488146, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4cccc(Cl)c4)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075)
(488010, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4OC)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075)
(488009, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4C)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075)
(410657, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(CC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.776119402985075)
(488071, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccccc4F)CC3)ccc1OCC)[nH]c2=O', 0.764705882352941)
(488149, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4cccc5ccccc54)CC3)ccc1OCC)[nH]c2=O', 0.764705882352941)
(1351312, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N(C)C)ccc1OCC)[nH]c2=O', 0.758064516129032)
(488148, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4ccc([N+](=O)[O-])cc4)CC3)ccc1OCC)[nH]c2=O', 0.753623188405797)
(410658, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(CCC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.753623188405797)
(488152, 'CCCCCCCCCC(=O)OCCN1CCN(S(=O)(=O)c2ccc(OCC)c(-c3nc4c(CCC)nn(C)c4c(=O)[nH]3)c2)CC1', 0.742857142857143)
(1334601, 'CCCc1nn(-c2cccnc2)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.742857142857143)
(1334602, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(-c3ccccc3)nn(C)c2c(=O)[nH]1', 0.738461538461539)
(410664, 'CCCOc1ccc(S(=O)(=O)N2CCC(C(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.735294117647059)
(1334603, 'CCCc1n[nH]c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.734375)
(410660, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCC(CCCC(=O)O)CC3)ccc1OCC)[nH]c2=O', 0.732394366197183)
(283528, 'CCCc1nc(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nn12', 0.727272727272727)
(410675, 'CCCOc1ccc(S(=O)(=O)N2CCC(CC(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.72463768115942)
(410742, 'CCCOc1ccc(S(=O)(=O)N2CCN(CCP(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.72463768115942)
(488011, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(c4cccc(C(F)(F)F)c4)CC3)ccc1OCC)[nH]c2=O', 0.722222222222222)
(410755, 'CCCOc1ccc(S(=O)(=O)N2CCC(CP(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714)
(410746, 'CCCOc1ccc(S(=O)(=O)N2CCC(P(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714)
(410735, 'CCCOc1ccc(S(=O)(=O)N2CCC(P(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714)
(410731, 'CCCOc1ccc(S(=O)(=O)N2CCN(CCP(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.714285714285714)
(488150, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(Cc4ccc5c(c4)OCO5)CC3)ccc1OCC)[nH]c2=O', 0.712328767123288)
(410715, 'CCCOc1ccc(S(=O)(=O)N2CCC(P(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.704225352112676)
(410737, 'CCCOc1ccc(S(=O)(=O)N2CCC(CP(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.704225352112676)
(1334755, 'CCCc1nn(C)c2c1nc(-c1cccc(S(=O)(=O)N3CCN(C)CC3)c1)[nH]c2=O', 0.698412698412698)
(1334754, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2cnn(C)c2c(=O)[nH]1', 0.698412698412698)
(410711, 'CCCOc1ccc(S(=O)(=O)N2CCN(CP(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.694444444444444)
(410713, 'CCCOc1ccc(S(=O)(=O)N2CCN(CCP(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.694444444444444)
(410676, 'CCCOc1ccc(S(=O)(=O)N2CCC(CCC(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.694444444444444)
(487042, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCNc3ccccc3)ccc1OCC)[nH]c2=O', 0.691176470588235)
(1351313, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCc3ccccc3)ccc1OCC)[nH]c2=O', 0.691176470588235)
(410717, 'CCCOc1ccc(S(=O)(=O)N2CCC(CP(=O)(O)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.684931506849315)
(488153, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCOC(=O)CCCO[N+](=O)[O-])CC3)ccc1OCC)[nH]c2=O', 0.684210526315789)
(1441770, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CCC)c(CC)c(=O)[nH]1', 0.676923076923077)
(567449, 'CCCc1c(OC)cc(OC)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.676470588235294)
(410677, 'CCCOc1ccc(S(=O)(=O)N2CCC(CCCC(=O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.675675675675676)
(304727, 'CCCn1nc(CC)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.671641791044776)
(410679, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)cc3c1OCC3)[nH]c2=O', 0.666666666666667)
(80598, 'CCCc1nn(C)c2c1nc(-c1ccccc1OCC)[nH]c2=O', 0.666666666666667)
(1441766, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(CCC)c(=O)[nH]1', 0.666666666666667)
(140806, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(C)c(=O)[nH]c2=O', 0.666666666666667)
(487043, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCNc3cccc4ccccc43)ccc1OCC)[nH]c2=O', 0.666666666666667)
(487044, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCNCC(=O)O)ccc1OCC)[nH]c2=O', 0.661971830985915)
(487046, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCN(CCO)S(=O)(=O)c3ccc(OCC)c(-c4nc5c(CCC)nn(C)c5c(=O)[nH]4)c3)ccc1OCC)[nH]c2=O', 0.657534246575342)
(410681, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)cc3c1OCO3)[nH]c2=O', 0.656716417910448)
(283502, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(ncn2CCC)c(=O)[nH]1', 0.656716417910448)
(410683, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)cc3c1OCCO3)[nH]c2=O', 0.656716417910448)
(553751, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3ccccn32)c(=O)[nH]1', 0.656716417910448)
(1441475, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CCC)cc(=O)[nH]1', 0.65625)
(555103, 'CCCc1c(OC)cc(O)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.647887323943662)
(304716, 'CCCn1nc(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O', 0.647058823529412)
(1262965, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(Cl)c(=O)[nH]1', 0.646153846153846)
(304811, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(c(CC)nn2C2CCCC2)c(=O)[nH]1', 0.642857142857143)
(1441765, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(CC)c(=O)[nH]1', 0.636363636363636)
(1262967, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(I)c(=O)[nH]1', 0.636363636363636)
(1262966, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(Br)c(=O)[nH]1', 0.636363636363636)
(1441764, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(C)c(=O)[nH]1', 0.636363636363636)
(558511, 'CCCCc1c(OC)cc(OC)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.633802816901408)
(80661, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C(N)=O)CC3)ccc1OCC)nc2O', 0.633802816901408)
(556780, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Br)c(OC)cc(OC)c2c(=O)[nH]1', 0.632352941176471)
(563798, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(C)c(OC)cc(OC)c2c(=O)[nH]1', 0.632352941176471)
(1441774, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Cl)c(=O)[nH]1', 0.630769230769231)
(1351308, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)Nc3ncccc3C3CCCN3C)ccc1OCC)[nH]c2=O', 0.628205128205128)
(140060, 'CCCn1c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c2c(=O)[nH]c1=O', 0.626865671641791)
(1262964, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(F)c(=O)[nH]1', 0.626865671641791)
(562127, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Cl)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101)
(558510, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(CC)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101)
(558503, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(I)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101)
(558494, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(F)c(OC)cc(OC)c2c(=O)[nH]1', 0.623188405797101)
(560323, 'C=Cc1c(OC)cc(OC)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.623188405797101)
(1441771, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.621212121212121)
(487045, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCN(CC(=O)O)S(=O)(=O)c3ccc(OCC)c(-c4nc5c(CCC)nn(C)c5c(=O)[nH]4)c3)ccc1OCC)[nH]c2=O', 0.618421052631579)
(1376119, 'CCCOc1ccc(S(=O)(=O)NCCC2CCCN2C)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.618421052631579)
(140771, 'CCCn1c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c2c(=O)n(C)c1=O', 0.617647058823529)
(410694, 'CCCOc1ccc(NC(C)=O)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.617647058823529)
(511952, 'CCCCN1C(=O)c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c(=O)c2C1=O', 0.617647058823529)
(1441775, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(c(=O)[nH]1)CCC2', 0.617647058823529)
(1262963, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)cc(=O)[nH]1', 0.615384615384615)
(1441474, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C)cc(=O)[nH]1', 0.615384615384615)
(562135, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2cc(OC)cc(OC)c2c(=O)[nH]1', 0.611940298507463)
(1441769, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C)c(CC)c(=O)[nH]1', 0.611940298507463)
(1441768, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(CC)c(=O)[nH]1', 0.608695652173913)
(563835, 'CCCCc1c(OC)cc(O)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.608108108108108)
(1441472, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(O)cc(=O)[nH]1', 0.606060606060606)
(1441471, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(N)cc(=O)[nH]1', 0.606060606060606)
(555082, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Br)c(OC)cc(O)c2c(=O)[nH]1', 0.605633802816901)
(139899, 'CCCCCCn1c2nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)[nH]c2c(=O)n(C)c1=O', 0.605633802816901)
(565660, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(C)c(OC)cc(O)c2c(=O)[nH]1', 0.605633802816901)
(1334600, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(-c3ccccc3)n[nH]c2c(=O)[nH]1', 0.605633802816901)
(1441761, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Cl)c(=O)[nH]1', 0.602941176470588)
(1441480, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC)c(NC(C)=O)c(=O)[nH]1', 0.6)
(140172, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)[nH]c2=O', 0.6)
(140026, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC1CC1)c(=O)n(C)c2=O', 0.6)
(556799, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(Cl)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222)
(563829, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(CC)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222)
(565656, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(F)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222)
(562146, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(I)c(OC)cc(O)c2c(=O)[nH]1', 0.597222222222222)
(1441767, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(C)c(=O)[nH]1', 0.594202898550725)
(1441763, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(I)c(=O)[nH]1', 0.594202898550725)
(1441762, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.594202898550725)
(1441473, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(NC(C)=O)cc(=O)[nH]1', 0.594202898550725)
(140805, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)(C)C)c(=O)n(C)c2=O', 0.591549295774648)
(1441773, 'CCCCCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.591549295774648)
(140087, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.591549295774648)
(410684, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCC3)[nH]c2=O', 0.589041095890411)
(560347, 'C=Cc1c(OC)cc(O)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12', 0.589041095890411)
(1441476, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)cc(=O)[nH]1', 0.588235294117647)
(1441479, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(F)(F)F)cc(=O)[nH]1', 0.588235294117647)
(555093, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2cc(OC)cc(O)c2c(=O)[nH]1', 0.585714285714286)
(1441772, 'CCCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(C(C)C)c(Br)c(=O)[nH]1', 0.585714285714286)
(139960, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC1CCCCC1)c(=O)n(C)c2=O', 0.583333333333333)
(140807, 'C=CCCn1c2[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc2c(=O)n(C)c1=O', 0.583333333333333)
(140260, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)CC)c(=O)n(C)c2=O', 0.583333333333333)
(80559, 'CCCc1nn(C)c2c1nc(-c1ccccc1OCC1CC1)[nH]c2=O', 0.582089552238806)
(1441477, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(CC(C)C)cc(=O)[nH]1', 0.579710144927536)
(1334762, 'CCCc1nn(C)c2c1nc(-c1cccnc1OCC)[nH]c2=O', 0.578125)
(410698, 'CCCOc1ccc(NC(=O)CCC)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.577464788732394)
(410696, 'CCCOc1ccc(NC(=O)CC)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.577464788732394)
(1441478, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(-c2ccccc2)cc(=O)[nH]1', 0.571428571428571)
(304703, 'CCCc1nc(C)n2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)nc2O', 0.571428571428571)
(410702, 'CCCOc1ccc(NC(=O)C(C)C)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.569444444444444)
(80558, 'CCCc1nn(C)c2c1nc(-c1ccccc1NS(C)(=O)=O)[nH]c2=O', 0.569230769230769)
(140519, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.567567567567568)
(410687, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCO3)[nH]c2=O', 0.567567567567568)
(410689, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCCO3)[nH]c2=O', 0.567567567567568)
(410686, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)cc3c1OCCC3)[nH]c2=O', 0.565789473684211)
(304580, 'CCCn1cnc2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)nc2O', 0.563380281690141)
(304707, 'CCCc1nc(CC)c2c(O)nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nn12', 0.561643835616438)
(140816, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(Cc1ccc(OC)cc1)c(=O)n(C)c2=O', 0.56)
(102837, 'CCCOc1ccc(S(=O)(=O)N2CCN(CP(=O)(O)O)CC2)cc1-c1nc2c(CCC)nn(C)c2c(O)n1', 0.545454545454545)
(325914, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3c(C)cccn32)c(O)n1', 0.540540540540541)
(512228, 'CCCc1nn(C)c2c1nc(-c1ccccc1O)[nH]c2=O', 0.53968253968254)
(410708, 'CCCOc1ccc(NC(=O)C2CCCCC2)cc1-c1nc2c(CCC)nn(C)c2c(=O)[nH]1', 0.539473684210526)
(102529, 'CCCOc1ccc(S(=O)(=O)N2CCN(CP(=O)(OCC)OCC)CC2)cc1-c1nc2c(CCC)nn(C)c2c(O)n1', 0.538461538461538)
(1334759, 'CCCc1nn(C)c2c1nc(-c1cccnc1OC)[nH]c2=O', 0.537313432835821)
(1008491, 'CCCc1nn(C)c2c1nc(-c1cc(S(N)(=O)=O)ccc1OCC)nc2O', 0.536231884057971)
(326033, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3cc(C)ccn32)c(O)n1', 0.533333333333333)
(1334767, 'CCCc1nn(C)c2c1nc(-c1cccnc1O)[nH]c2=O', 0.53030303030303)
(325968, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3ccc(Br)cn32)c(O)n1', 0.526315789473684)
(1334776, 'CCCc1nn(C)c2c1nc(-c1ccsc1)[nH]c2=O', 0.523809523809524)
(1334773, 'CCCc1nn(C)c2c1nc(-c1cn(C)cn1)[nH]c2=O', 0.523809523809524)
(80670, 'CCCc1nn(C)c2c1nc(-c1ccccc1[N+](=O)[O-])[nH]c2=O', 0.522388059701492)
(1334761, 'CCCc1nn(C)c2c1nc(-c1cccnc1OC(C)C)[nH]c2=O', 0.521739130434783)
(971340, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NC)ccc1OCC)nc2O', 0.52112676056338)
(800246, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N(CCO)CCO)ccc1OCC)nc2O', 0.520547945205479)
(326091, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3ccc(C)cn32)c(O)n1', 0.52)
(140606, 'CC(C)COc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.52)
(1227092, 'CCCc1nn(C)c2c1nc(-c1cc([S+](=O)([O-])N3CCN(C)CC3)ccc1OCC)nc2O', 0.52)
(1353820, 'CCCc1nn(C)c2c1nc(-c1ccc(C)cc1)[nH]c2=O', 0.516129032258065)
(512227, 'CCCc1nn(C)c2c1nc(-c1ccccc1)[nH]c2=O', 0.516129032258065)
(1353819, 'CCCc1nn(C)c2c1nc(-c1ccc(OC)cc1)[nH]c2=O', 0.515625)
(1334771, 'CCCc1nn(C)c2c1nc(-c1cn(C)nc1C)[nH]c2=O', 0.515625)
(1269220, 'CCCn1nc2c(nc(-c3cc(S(=O)(=O)N4CCN(C)CC4)cnc3OCCOC)[nH]c2=O)c1CC', 0.513513513513513)
(1212525, 'CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)NCCO)ccc1OCC)nc2O', 0.513513513513513)
(140345, 'CCOc1ccc(S(=O)(=O)N2CCN(CC)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.513157894736842)
(1353817, 'CCCc1nn(C)c2c1nc(-c1ccc(Br)cc1)[nH]c2=O', 0.507936507936508)
(1334775, 'CCCc1nn(C)c2c1nc(-c1ccnn1C)[nH]c2=O', 0.507936507936508)
(421326, 'CCCc1nc(C)c2c(=O)nc(-c3cc(S(=O)(=O)N4CCN(CC)CC4)ccc3OCC)[nH]n12', 0.506666666666667)
(28710, 'CCCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc(O)c2cc3[nH]cnc3cc2n1', 0.506666666666667)
(140382, 'CCOc1ccc(S(=O)(=O)N2CCN(CCN(C)C)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.506329113924051)
(453288, 'CCCc1c2nc(-c3cc(S(=O)(=O)N4CCN(CC)CC4)cnc3OCCOC)[nH]c(=O)c2nn1C', 0.5)
(511950, 'CCCCN1C(=O)C2=C(N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)NC2)C1=O', 0.5)
(1353816, 'CCCc1nn(C)c2c1nc(-c1ccc(Cl)cc1)[nH]c2=O', 0.5)
(326382, 'CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3cccc(CC)n32)c(O)n1', 0.5)
(140451, 'CCOc1ccc(S(=O)(=O)N2CCN(CCO)CC2)cc1-c1nc2c([nH]1)n(CC(C)C)c(=O)n(C)c2=O', 0.5)
(1351307, 'CCOc1ccc(S(=O)(=O)Nc2ccc(O)c(C(=O)O)c2)cc1-c1nc2c(C(C)(C)C)nn(C)c2c(=O)[nH]1', 0.5)
(1353821, 'CCCc1nn(C)c2c1nc(-c1cccc(Br)c1)[nH]c2=O', 0.5)
(1334770, 'CCCc1nn(C)c2c1nc(-c1ccc(=O)[nH]n1)[nH]c2=O', 0.5)
(1334766, 'CCCc1nn(C)c2c1nc(-c1ccc(O)cc1)[nH]c2=O', 0.5)
(1353818, 'CCCc1nn(C)c2c1nc(-c1cccc(Cl)c1)[nH]c2=O', 0.5)

More about the RDKit Cartridge here: http://www.rdkit.org/docs/Cartridge.html

Working with Pandas

In [38]:
from rdkit.Chem import PandasTools
import pandas as pd
In [39]:
data = pd.read_sql(sql2, conn)
In [40]:
data.shape
Out[40]:
(187, 3)
In [41]:
data.head()
Out[41]:
molregno smiles similarity
0 410802 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O 1.000000
1 1351310 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O 0.881356
2 1351311 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O 0.881356
3 80636 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O 0.866667
4 80694 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O 0.838710

Filter the table

In [42]:
nn = data[data['similarity'] >= 0.8]
In [43]:
nn.shape
Out[43]:
(12, 3)
In [44]:
nn.head()
Out[44]:
molregno smiles similarity
0 410802 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O 1.000000
1 1351310 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O 0.881356
2 1351311 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O 0.881356
3 80636 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O 0.866667
4 80694 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O 0.838710

Add RDKit molecules and descriptors to the table

In [45]:
PandasTools.AddMoleculeColumnToFrame(data,smilesCol='smiles',molCol='mol',includeFingerprints=True)
In [46]:
data.head()
Out[46]:
molregno smiles similarity mol
0 410802 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O 1.000000 Mol
1 1351310 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol
2 1351311 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol
3 80636 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O 0.866667 Mol
4 80694 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O 0.838710 Mol
In [47]:
data['logp'] = data['mol'].map(Descriptors.MolLogP)
data['mw'] = data['mol'].map(Descriptors.MolWt)
In [48]:
data.head()
Out[48]:
molregno smiles similarity mol logp mw
0 410802 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O 1.000000 Mol 1.6109 474.587
1 1351310 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol 3.2395 473.599
2 1351311 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol 2.8494 459.572
3 80636 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O 0.866667 Mol 1.2687 460.560
4 80694 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O 0.838710 Mol 0.9734 504.613
In [49]:
data.sort(columns=['similarity']).head()
Out[49]:
molregno smiles similarity mol logp mw
186 1353818 CCCc1nn(C)c2c1nc(-c1cccc(Cl)c1)[nH]c2=O 0.5 Mol 2.9295 302.765
177 453288 CCCc1c2nc(-c3cc(S(=O)(=O)N4CCN(CC)CC4)cnc3OCCOC)[nH]c(=O)c2nn1C 0.5 Mol 1.0225 519.628
178 511950 CCCCN1C(=O)C2=C(N=C(c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)NC2)C1=O 0.5 Mol 0.7942 489.598
179 1353816 CCCc1nn(C)c2c1nc(-c1ccc(Cl)cc1)[nH]c2=O 0.5 Mol 2.9295 302.765
180 326382 CCOc1ccc(S(=O)(=O)N2CCN(C)CC2)cc1-c1nc2c(nc3cccc(CC)n32)c(O)n1 0.5 Mol 2.5473 496.593

Simple plotting using the table columns

In [50]:
data[['mw', 'logp']].describe()
Out[50]:
mw logp
count 187.000000 187.000000
mean 486.180160 2.268858
std 92.688399 0.968545
min 268.320000 -0.048300
25% 448.589000 1.566700
50% 496.593000 2.292500
75% 537.152000 2.902300
max 866.980000 4.731700
In [51]:
rcParams['figure.figsize'] = 12,12
In [52]:
data['logp'].hist()
Out[52]:
<matplotlib.axes.AxesSubplot at 0x390e490>
In [53]:
scatter(data['mw'],data['logp'])
Out[53]:
<matplotlib.collections.PathCollection at 0x422b0d0>
In [54]:
data[data['mw']>800][['molregno','mol','mw','logp']]
Out[54]:
molregno mol mw logp
60 487046 Mol 852.997 2.6245
88 487045 Mol 866.980 2.7168

Substructure search within the table

In [55]:
qsmi = 'c1nn(C)c2c1nc[nH]c2=O'
In [56]:
qmol = Chem.MolFromSmiles(qsmi)
In [57]:
qmol
Out[57]:
In [58]:
subset = data[data['mol'] >= qmol]
In [59]:
subset.shape
Out[59]:
(90, 6)
In [60]:
subset.head()
Out[60]:
molregno smiles similarity mol logp mw
0 410802 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O 1.000000 Mol 1.6109 474.587
1 1351310 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol 3.2395 473.599
2 1351311 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol 2.8494 459.572
3 80636 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCNCC3)ccc1OCC)[nH]c2=O 0.866667 Mol 1.2687 460.560
4 80694 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(CCO)CC3)ccc1OCC)[nH]c2=O 0.838710 Mol 0.9734 504.613
In [61]:
data.groupby(data['mol'] >= qmol).describe().unstack()
Out[61]:
molregno similarity logp mw
count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max count mean std min 25% 50% 75% max
mol
False 97 741276.649485 525340.753036 28710 304703.0 560323.0 1441471.0 1441775 97 0.595045 0.050741 0.5 0.567568 0.600000 0.626866 0.734375 97 2.018649 0.740929 -0.0483 1.482000 2.08120 2.54670 4.2180 97 492.993175 45.491272 391.453 462.61600 490.586 518.64000 624.701
True 90 709987.411111 449869.213609 80558 410694.5 488009.5 1334760.5 1376119 90 0.670030 0.113819 0.5 0.567983 0.691176 0.753623 1.000000 90 2.538528 1.107499 0.3034 1.620075 2.65905 3.32205 4.7317 90 478.837244 124.968717 268.320 386.95875 504.591 558.42875 866.980
In [62]:
data['containsQ'] = data['mol'] >= qmol
In [63]:
data.head(2)
Out[63]:
molregno smiles similarity mol logp mw containsQ
0 410802 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCN(C)CC3)ccc1OCC)[nH]c2=O 1.000000 Mol 1.6109 474.587 True
1 1351310 CCCc1nn(C)c2c1nc(-c1cc(S(=O)(=O)N3CCCCCC3)ccc1OCC)[nH]c2=O 0.881356 Mol 3.2395 473.599 True
In [64]:
data.boxplot('similarity',by='containsQ')
Out[64]:
<matplotlib.axes.AxesSubplot at 0x393c4d0>
In [65]:
conn.close()