import numpy as np
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import Binarizer as skBinarizer
class Binarizer():
def fit(self, X):
return self
def transform(self, X):
Xt = np.zeros_like(X)
Xt[X > 0] = 1
return Xt
data = fetch_20newsgroups().data
data = data[:1000]
X = CountVectorizer().fit_transform(data).toarray()
trans1 = Binarizer().fit(X)
trans2 = skBinarizer().fit(X)
Xt1 = trans1.transform(X)
Xt2 = trans2.transform(X)