import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import LabelBinarizer as skLabelBinarizer
class LabelBinarizer():
def __init__(self, pos_label=1, neg_label=0):
self.pos_label = pos_label
self.neg_label = neg_label
def fit(self, y):
self.classes_ = np.unique(y)
return self
def transform(self, y):
y_enc = np.full((y.shape[0], len(self.classes_)), self.neg_label)
for i, c in enumerate(self.classes_):
y_enc[y == c, i] = self.pos_label
if len(self.classes_) == 2:
y_enc = y_enc[:, 1].reshape(-1, 1)
return y_enc
iris = load_iris()
# binary
y = iris.target
y = y[y != 2]
le1 = LabelBinarizer().fit(y)
le2 = skLabelBinarizer().fit(y)
assert np.array_equal(le1.classes_, le2.classes_)
yt1 = le1.transform(y)
yt2 = le2.transform(y)
assert yt1.shape == (len(y), 1)
assert np.array_equal(yt1, yt2)
# numeric multiclass
y = iris.target
le1 = LabelBinarizer().fit(y)
le2 = skLabelBinarizer().fit(y)
assert np.array_equal(le1.classes_, le2.classes_)
yt1 = le1.transform(y)
yt2 = le2.transform(y)
assert np.array_equal(yt1, yt2)
# string multiclass
y = iris.target_names[iris.target]
le1 = LabelBinarizer().fit(y)
le2 = skLabelBinarizer().fit(y)
assert np.array_equal(le1.classes_, le2.classes_)
yt1 = le1.transform(y)
yt2 = le2.transform(y)
assert np.array_equal(yt1, yt2)