import numpy as np
from sklearn.datasets import load_iris
from sklearn.preprocessing import LabelEncoder as skLabelEncoder
class LabelEncoder():
def fit(self, y):
self.classes_ = np.unique(y)
return self
def transform(self, y):
return np.searchsorted(self.classes_, y)
iris = load_iris()
# numeric multiclass
y = iris.target
le1 = LabelEncoder().fit(y)
le2 = skLabelEncoder().fit(y)
assert np.array_equal(le1.classes_, le2.classes_)
yt1 = le1.transform(y)
yt2 = le2.transform(y)
assert np.array_equal(yt1, yt2)
# string multiclass
y = iris.target_names[iris.target]
le1 = LabelEncoder().fit(y)
le2 = skLabelEncoder().fit(y)
assert np.array_equal(le1.classes_, le2.classes_)
yt1 = le1.transform(y)
yt2 = le2.transform(y)
assert np.array_equal(yt1, yt2)