#!/usr/bin/env python # coding: utf-8 # In[1]: import os import pandas import pandas_profiling import pandas as pd # In[2]: from rdkit import Chem from rdkit import RDConfig from rdkit.Chem import rdBase from rdkit.Chem import Descriptors from rdkit.Chem.Descriptors import _descList from rdkit.ML.Descriptors import MoleculeDescriptors # In[3]: print(rdBase.rdkitVersion) datadir = os.path.join( RDConfig.RDDocsDir, "Book/data/cdk2.sdf" ) # In[4]: mols = [mol for mol in Chem.SDMolSupplier(datadir) if mol != None] # In[5]: desc_name = [desc[0] for desc in _descList] calc = MoleculeDescriptors.MolecularDescriptorCalculator(desc_name) descs_list = [calc.CalcDescriptors(mol) for mol in mols] # In[6]: print(len(descs_list)) print(len(descs_list[0])) # In[7]: data = {} for name in desc_name: data[name] = [] # In[8]: for descs in descs_list: for i, desc in enumerate(descs): data[desc_name[i]].append(desc) # In[9]: df = pd.DataFrame(data) # In[10]: print(df.shape) # In[11]: pandas_profiling.ProfileReport(df) # In[ ]: