Nbviewer https://nbviewer.jupyter.org/github/shaundsouza/deep-learning/blob/master/ig.ipynb
Execute on Binder https://mybinder.org/v2/gh/shaundsouza/ai-ecosystems-enabling/master?filepath=ig.ipynb
import numpy as np
import pandas as pd
def B(q):
if q == 0 or q == 1:
return 0
else:
return -(q * np.log2([q])[0] + (1 - q) * np.log([1 - q])[0])
def H(q):
x = pd.unique(q)
h = 0
for i in x:
p = sum(q == i) / len(q)
# print(i, p)
h = h + p * np.log2([p])[0]
return -h
def remainder(q, t):
x = pd.unique(q)
h = 0
for i in x:
# print(i)
pknk = sum(q == i)
pk = sum(t[q == i] == "Yes")
p = pknk / len(q) * B(pk / pknk)
# print(pknk, pk, p)
h = h + p
return h
def remainderH(q, t):
x = pd.unique(q)
h = 0
for i in x:
# print(i)
pknk = sum(q == i)
# pk = sum(t[q == i] == "Yes")
p = pknk / len(q) * H(t[q == i])
# print(i, pknk, p)
h = h + p
return h
def gain(q, t):
b = B(sum(t == "Yes") / len(t))
return b - remainder(q, t)
def gainH(q, t):
return H(t) - remainderH(q, t)
dataset = pd.read_csv("house/train.csv")
size = dataset.shape
dataset = dataset.fillna(0)
names = dataset.columns.values
for i in range(size[1]):
for j in range(size[1]):
print(names[i], names[j], gainH(dataset.iloc[:, i], dataset.iloc[:, j]))