import pandas as pd import numpy as np %load_ext rmagic %%R -o x,y set.seed(1234); par(mar=c(0,0,0,0)) x <- rnorm(12,mean=rep(1:3,each=4),sd=0.2) y <- rnorm(12,mean=rep(c(1,2,1),each=4),sd=0.2) plot(x,y,col='blue',pch=19,cex=2) text(x+0.05,y+0.05,labels=as.character(0:11)) dataFrame = pd.DataFrame(zip(x, y), columns=['x', 'y']) # compute distance matrix from scipy.spatial.distance import pdist, squareform # not printed as pretty, but the values are correct distxy = squareform(pdist(dataFrame, metric='euclidean')) print distxy # perform clustering and plot the dendrogram from scipy.cluster.hierarchy import linkage, dendrogram R = dendrogram(linkage(distxy, method='complete')) xlabel('points') ylabel('Height') suptitle('Cluster Dendrogram', fontweight='bold', fontsize=14); %%R -i x,y -o dataMatrix dataFrame <- data.frame(x=x,y=y) set.seed(143) dataMatrix <- as.matrix(dataFrame)[sample(1:12),] # a simple function to compute hierarchical cluster on both rows and columns, and plot heatmaps def heatmap(dm): from scipy.cluster.hierarchy import linkage, dendrogram from scipy.spatial.distance import pdist, squareform D1 = squareform(pdist(dm, metric='euclidean')) D2 = squareform(pdist(dm.T, metric='euclidean')) f = figure(figsize=(8, 8)) # add first dendrogram ax1 = f.add_axes([0.09, 0.1, 0.2, 0.6]) Y = linkage(D1, method='complete') Z1 = dendrogram(Y, orientation='right') ax1.set_xticks([]) ax1.set_yticks([]) # add second dendrogram ax2 = f.add_axes([0.3, 0.71, 0.6, 0.2]) Y = linkage(D2, method='complete') Z2 = dendrogram(Y) ax2.set_xticks([]) ax2.set_yticks([]) # add matrix plot axmatrix = f.add_axes([0.3, 0.1, 0.6, 0.6]) idx1 = Z1['leaves'] idx2 = Z2['leaves'] D = D1[idx1, :] D = D[:, idx2] im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap='hot') axmatrix.set_xticks([]) axmatrix.set_yticks([]) return {'ordered' : D, 'rorder' : Z1['leaves'], 'corder' : Z2['leaves']} heatmap(dataMatrix)