#!/usr/bin/env python # coding: utf-8 # In[27]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import os from rdkit import Chem from rdkit import RDConfig import pandas as pd import janitor from janitor import chemistry from rdkit.Chem import PandasTools from rdkit.Chem.Draw import IPythonConsole from sklearn.decomposition import PCA plt.style.use('ggplot') # In[28]: path = os.path.join(RDConfig.RDDocsDir,'Book/data/cdk2.sdf') # In[29]: df = PandasTools.LoadSDF(path) # In[30]: df.head(2) # In[31]: fp1=chemistry.morgan_fingerprint(df, mols_col='ROMol', radius=2, nbits=512, kind='bits') # In[32]: type(fp1) # In[33]: fp1.head(2) # In[34]: fp1.shape # In[35]: fp2=chemistry.morgan_fingerprint(df, mols_col='ROMol', radius=2, nbits=512, kind='counts') fp2.shape # In[38]: pca = PCA(n_components=3) # In[45]: pca_res = pca.fit_transform(fp2) # In[46]: plt.scatter(pca_res[:,0], pca_res[:,1], c=df.Cluster) # In[36]: df['SMILES'] = df.ROMol.apply(Chem.MolToSmiles) df.head(2) # In[12]: df = chemistry.smiles2mol(df, smiles_col='SMILES', mols_col='newROMol', progressbar='notebook') # In[13]: df.head(2) # In[14]: # add_column function is not native pandas method. # df['NumAtm'] = df.ROMol.apply(Chem.Mol.GetAtoms) df = df.add_column('NumAtm', [Chem.Mol.GetNumAtoms(mol) for mol in df.ROMol]) # In[ ]: df.head(2)