#!/usr/bin/env python
# coding: utf-8

# # Visualizing Iris Dataset using Clustergrammer

# In[1]:


from sklearn import datasets
import pandas as pd
from clustergrammer_widget import *
from copy import deepcopy
net = Network(clustergrammer_widget)


# In[2]:


# import iris data 
iris = datasets.load_iris()


# # Make DataFrame from Iris
# First make row and col names, then build the dataframe

# In[3]:


unique_cat_names = iris.target_names.tolist()
cols = []
for i in range(len(iris.target)):
    inst_col = iris.target[i]
    inst_name = 'flowers: flower-'+str(i)
    inst_cat = 'flower-type: ' + unique_cat_names[inst_col]
    inst_tuple = (inst_name, inst_cat)
    cols.append(inst_tuple)


# In[4]:


rows = []
for i in range(len(iris.feature_names)):
    inst_name = 'feature: ' + iris.feature_names[i]
    rows.append(inst_name)


# Make a DataFrame with flower-samples as columns and dimensions (e.g. peteal width) as rows. Column categories (e.g. setosa) have been added to the column names using tuples.

# In[5]:


mat = iris.data.transpose()
df = pd.DataFrame(data=mat, columns=cols, index = rows)
df.shape


# # Visualize Iris Dataframe using Clustergrammer

# Below we Z-score normalize the rows (dimensions) to make them more easily comparable. 

# In[6]:


net.load_df(df)
net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: versicolor', inst_color='red')
net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: virginica', inst_color='blue')
net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: setosa', inst_color='yellow')


# In[7]:


net.load_df(df)
net.normalize(axis='row', norm_type='zscore', keep_orig=True)
net.cluster()
net.widget()


# We see that flowers still largely cluster according to the categories. We also get a smoother breakdown ot the flowers into hierarchical clusters (toggle the column dendrogram level using the triangle/circle slider). 

# In[ ]: