#!/usr/bin/env python
# coding: utf-8

# # Clustergrammer DataFrame Visualization Example
# This example shows how to visualize a Pandas DataFrame using the [Clustergrammer Jupyter Widget](http://clustergrammer.readthedocs.io/clustergrammer_widget.html).

# In[1]:


import numpy as np
import pandas as pd
from clustergrammer_widget import *


# Generate a dataframe of random data with row and column labels

# In[2]:


# generate random matrix
num_rows = 500
num_cols = 10
np.random.seed(seed=100)
mat = np.random.rand(num_rows, num_cols)

# make row and col labels
rows = range(num_rows)
cols = range(num_cols)
rows = [str(i) for i in rows]
cols = [str(i) for i in cols]

# make dataframe 
df = pd.DataFrame(data=mat, columns=cols, index=rows)


# Initialize the network object, load the dataframe, hierarchically cluster the rows and columns using default parameters, and finally visualize using clustergrammer_widget.

# In[3]:


# initialize network object
net = Network(clustergrammer_widget)
# load dataframe
net.load_df(df)
# cluster using default parameters
net.cluster(enrichrgram=False)
# make the visualization
net.widget()


# The network object can also be used to normalize and filter your data. Here we will Z-score normalize the columns and make an updated visualization.

# In[4]:


net.normalize(axis='col', norm_type='zscore', keep_orig=True)
net.cluster(enrichrgram=False)
net.widget()


# Here we will filter rows based on their sum. We will keep the top 20 rows based on their absolute value sum. 

# In[5]:


net.filter_N_top('row', 20, 'sum')
net.cluster(enrichrgram=False)
net.widget()


# In[ ]: