#!/usr/bin/env python # coding: utf-8 # In[11]: get_ipython().run_line_magic('pylab', 'inline') import pandas as pd import mca # In[12]: data = pd.read_table('./cookieclassifier_data_matrix.tsv', sep='\t', header=0) # In[13]: data.columns = ['category'] + data.columns.tolist()[1:] # In[14]: data['category'] = data.category.astype('category') data['category'] = data.category.cat.rename_categories([1,2,3]) # In[15]: data# X = data.drop('category', axis=1) mca_ben = mca.MCA(X) mca_ind = mca.MCA(X, benzecri=False) # In[16]: mca_ben # In[18]: fs, cos, cont = 'Factor score','Squared cosines', 'Contributions x 1000' table3 = pd.DataFrame(columns=X.index, index=pd.MultiIndex .from_product([[fs, cos, cont], range(1, 3)])) table3.loc[fs, :] = mca_ben.fs_r(N=2).T table3.loc[cos, :] = mca_ben.cos_r(N=2).T table3.loc[cont, :] = mca_ben.cont_r(N=2).T * 1000 # In[19]: table3 # In[28]: import matplotlib.pyplot as plt points = table3.loc[fs].values labels = table3.columns.values colors = ['#66c2a5', '#fc8d62','#8da0cb'] plt.figure() plt.margins(0.1) plt.axhline(0, color='gray') plt.axvline(0, color='gray') plt.xlabel('Factor 1') plt.ylabel('Factor 2') plt.scatter(*points, s=20, marker='o', c='r', alpha=.5, linewidths=0) for label, x, y in zip(labels, *points): if y>3: plt.annotate(label, xy=(x, y), xytext=(x + .03, y + .03)) plt.show() # In[26]: table4 = pd.DataFrame(columns=X.columns, index=pd.MultiIndex .from_product([[fs, cos, cont], range(1, 3)])) table4.loc[fs, :] = mca_ben.fs_c(N=2).T table4.loc[cos, :] = mca_ben.cos_c(N=2).T table4.loc[cont,:] = mca_ben.cont_c(N=2).T * 1000 # In[27]: table4 # In[ ]: