Imports

In [51]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib as mpl
from IPython.display import display, HTML
import matplotlib.pyplot as plt
In [52]:
HTML('''<script>
code_show=true; 
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[52]:

Loading data

In [72]:
path = '../../../Desktop/IndiaEuroopeFstMatrix.csv'
data = pd.read_csv(path, index_col=0)

Describing data

In [73]:
data.describe()
Out[73]:
Arabian Armenia Balochi Finn French_Basque German GreatBritain Greek Gujar Iranians ... Pathan Ror Russian Sardinian Sicily Sindhi SouthIndianBrahmin Sweden Ukrainian Velamas
count 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 ... 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000
mean 0.027691 0.016877 0.016081 0.022387 0.026139 0.016967 0.018880 0.016941 0.017297 0.013983 ... 0.012702 0.014647 0.018529 0.028212 0.016577 0.014891 0.018854 0.019430 0.018407 0.028085
std 0.012694 0.010797 0.008601 0.013172 0.015912 0.013317 0.014172 0.013114 0.012015 0.007964 ... 0.008628 0.008389 0.012769 0.016944 0.012912 0.010864 0.014356 0.014352 0.013452 0.017732
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ... -0.002255 0.000000 0.000000 0.000000 0.000000 -0.001441 0.000000 0.000000 0.000000 0.000000
25% 0.022636 0.011444 0.008380 0.013001 0.012119 0.004905 0.006305 0.006448 0.005763 0.007930 ... 0.004965 0.007267 0.008082 0.014581 0.007022 0.004335 0.005800 0.006741 0.006433 0.011498
50% 0.027348 0.014689 0.017663 0.022216 0.027348 0.016246 0.018219 0.014379 0.014365 0.013203 ... 0.014098 0.016325 0.017964 0.022216 0.013708 0.011532 0.017565 0.018712 0.017675 0.028377
75% 0.033940 0.021246 0.022004 0.030397 0.036395 0.024047 0.026683 0.024672 0.025846 0.017872 ... 0.017230 0.018279 0.026222 0.040161 0.024135 0.022793 0.029864 0.027291 0.025689 0.042759
max 0.052244 0.041692 0.032694 0.047261 0.057338 0.044564 0.047698 0.045260 0.040746 0.032079 ... 0.031138 0.033152 0.043997 0.060416 0.044832 0.036401 0.045192 0.048118 0.045770 0.057485

8 rows × 27 columns

In [74]:
data.head()
Out[74]:
Arabian Armenia Balochi Finn French_Basque German GreatBritain Greek Gujar Iranians ... Pathan Ror Russian Sardinian Sicily Sindhi SouthIndianBrahmin Sweden Ukrainian Velamas
Arabian 0.000000 0.010957 0.025432 0.032965 0.027348 0.023213 0.024507 0.014379 0.033982 0.011750 ... 0.026316 0.030182 0.027716 0.022058 0.011766 0.030051 0.037871 0.026409 0.025065 0.048797
Armenia 0.010957 0.000000 0.012653 0.019903 0.017114 0.010802 0.011932 0.004274 0.021215 0.002567 ... 0.012541 0.016303 0.014689 0.014394 0.002779 0.017021 0.025402 0.013863 0.012502 0.037053
Balochi 0.025432 0.012653 0.000000 0.026459 0.030516 0.019401 0.022428 0.017663 0.008419 0.008204 ... 0.005354 0.008134 0.021580 0.032694 0.016834 0.006156 0.010808 0.022637 0.021003 0.019953
Finn 0.032965 0.019903 0.026459 0.000000 0.016536 0.004850 0.006830 0.012294 0.029607 0.019570 ... 0.021278 0.020519 0.004338 0.022216 0.013708 0.026759 0.032961 0.004648 0.006050 0.045752
French_Basque 0.027348 0.017114 0.030516 0.016536 0.000000 0.009033 0.008326 0.010193 0.037218 0.019364 ... 0.027649 0.028534 0.012767 0.013025 0.009978 0.033123 0.041682 0.010163 0.011471 0.054804

5 rows × 27 columns

Normalizing

In [75]:
data-=data.min().min()
data.min().min()
data/=data.max().max()
data.describe()
Out[75]:
Arabian Armenia Balochi Finn French_Basque German GreatBritain Greek Gujar Iranians ... Pathan Ror Russian Sardinian Sicily Sindhi SouthIndianBrahmin Sweden Ukrainian Velamas
count 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 ... 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000 27.000000
mean 0.477827 0.305274 0.292580 0.393201 0.453058 0.306703 0.337241 0.306304 0.311973 0.259102 ... 0.238651 0.269685 0.331634 0.486132 0.300485 0.273591 0.336817 0.346014 0.329680 0.484113
std 0.202541 0.172283 0.137238 0.210175 0.253899 0.212492 0.226132 0.209246 0.191710 0.127073 ... 0.137670 0.133849 0.203739 0.270367 0.206022 0.173351 0.229062 0.229000 0.214639 0.282936
min 0.035981 0.035981 0.035981 0.035981 0.035981 0.035981 0.035981 0.035981 0.035981 0.035981 ... 0.000000 0.035981 0.035981 0.035981 0.035981 0.012985 0.035981 0.035981 0.035981 0.035981
25% 0.397160 0.218591 0.169699 0.243425 0.229350 0.114253 0.136580 0.138868 0.127935 0.162520 ... 0.115200 0.151928 0.164941 0.268637 0.148027 0.105148 0.128532 0.143546 0.138623 0.219441
50% 0.472344 0.270367 0.317821 0.390468 0.472344 0.295212 0.326688 0.265416 0.265196 0.246645 ... 0.260938 0.296462 0.322621 0.390468 0.254709 0.219987 0.316246 0.334559 0.318012 0.488773
75% 0.577531 0.374992 0.387079 0.521001 0.616711 0.419675 0.461733 0.429647 0.448386 0.321145 ... 0.310910 0.327638 0.454390 0.676805 0.421079 0.399675 0.512504 0.471443 0.445880 0.718258
max 0.869604 0.701229 0.557659 0.790089 0.950882 0.747060 0.797057 0.758164 0.686136 0.547833 ... 0.532820 0.564969 0.738000 1.000000 0.751331 0.616803 0.757079 0.803757 0.766292 0.953219

8 rows × 27 columns

Plotting

In [76]:
data_lt = data.where(np.tril(np.ones(data.shape)).astype(np.bool))
mpl.rcParams['figure.figsize'] = 20,15
ax = sb.heatmap(data_lt, cmap="Spectral", square=True, linewidths=.5)
bottom, top = ax.get_ylim()
ax.set_ylim(bottom + 0.5, top - 0.5)
#sb.set(font_scale=1.55)
ax.hlines([range(47)], *ax.get_xlim())
ax.vlines([range(47)], *ax.get_xlim())
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.show()
In [ ]: