#!/usr/bin/env python # coding: utf-8 # # Visualizing Iris Dataset using Clustergrammer # In[1]: from sklearn import datasets import pandas as pd from clustergrammer_widget import * from copy import deepcopy net = Network(clustergrammer_widget) # In[2]: # import iris data iris = datasets.load_iris() # # Make DataFrame from Iris # First make row and col names, then build the dataframe # In[3]: unique_cat_names = iris.target_names.tolist() cols = [] for i in range(len(iris.target)): inst_col = iris.target[i] inst_name = 'flowers: flower-'+str(i) inst_cat = 'flower-type: ' + unique_cat_names[inst_col] inst_tuple = (inst_name, inst_cat) cols.append(inst_tuple) # In[4]: rows = [] for i in range(len(iris.feature_names)): inst_name = 'feature: ' + iris.feature_names[i] rows.append(inst_name) # Make a DataFrame with flower-samples as columns and dimensions (e.g. peteal width) as rows. Column categories (e.g. setosa) have been added to the column names using tuples. # In[5]: mat = iris.data.transpose() df = pd.DataFrame(data=mat, columns=cols, index = rows) df.shape # # Visualize Iris Dataframe using Clustergrammer # Below we Z-score normalize the rows (dimensions) to make them more easily comparable. # In[6]: net.load_df(df) net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: versicolor', inst_color='red') net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: virginica', inst_color='blue') net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: setosa', inst_color='yellow') # In[7]: net.load_df(df) net.normalize(axis='row', norm_type='zscore', keep_orig=True) net.cluster() net.widget() # We see that flowers still largely cluster according to the categories. We also get a smoother breakdown ot the flowers into hierarchical clusters (toggle the column dendrogram level using the triangle/circle slider). # In[ ]: