from sklearn import datasets
import pandas as pd
from clustergrammer_widget import *
from copy import deepcopy
net = Network(clustergrammer_widget)
# import iris data
iris = datasets.load_iris()
First make row and col names, then build the dataframe
unique_cat_names = iris.target_names.tolist()
cols = []
for i in range(len(iris.target)):
inst_col = iris.target[i]
inst_name = 'flowers: flower-'+str(i)
inst_cat = 'flower-type: ' + unique_cat_names[inst_col]
inst_tuple = (inst_name, inst_cat)
cols.append(inst_tuple)
rows = []
for i in range(len(iris.feature_names)):
inst_name = 'feature: ' + iris.feature_names[i]
rows.append(inst_name)
Make a DataFrame with flower-samples as columns and dimensions (e.g. peteal width) as rows. Column categories (e.g. setosa) have been added to the column names using tuples.
mat = iris.data.transpose()
df = pd.DataFrame(data=mat, columns=cols, index = rows)
df.shape
(4, 150)
Below we Z-score normalize the rows (dimensions) to make them more easily comparable.
net.load_df(df)
net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: versicolor', inst_color='red')
net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: virginica', inst_color='blue')
net.set_cat_color(axis='col', cat_index=1, cat_name='flower-type: setosa', inst_color='yellow')
net.load_df(df)
net.normalize(axis='row', norm_type='zscore', keep_orig=True)
net.cluster()
net.widget()
Widget Javascript not detected. It may not be installed or enabled properly.
We see that flowers still largely cluster according to the categories. We also get a smoother breakdown ot the flowers into hierarchical clusters (toggle the column dendrogram level using the triangle/circle slider).