import sklearn
from sklearn import preprocessing, decomposition, linear_model, pipeline, metrics
import sklearn_pandas
import pandas as pd
import numpy as np
## load some data
data = pd.DataFrame({'pet': ['cat', 'dog', 'dog', 'fish', 'cat', 'dog', 'cat', 'fish'],
'children': [4., 6, 3, 3, 2, 3, 5, 4],
'salary': [90, 24, 44, 27, 32, 59, 36, 27]})
data
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated. warnings.warn(d.msg, DeprecationWarning) /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated. warnings.warn(d.msg, DeprecationWarning)
children | pet | salary | |
---|---|---|---|
0 | 4 | cat | 90 |
1 | 6 | dog | 24 |
2 | 3 | dog | 44 |
3 | 3 | fish | 27 |
4 | 2 | cat | 32 |
5 | 3 | dog | 59 |
6 | 5 | cat | 36 |
7 | 4 | fish | 27 |
lb = sklearn.preprocessing.LabelBinarizer()
#lb.fit(list(data.pet))
print np.asarray(data.pet)
lb.fit_transform(np.asarray(data.pet))
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-23-744549e98927> in <module>() 2 #lb.fit(list(data.pet)) 3 print np.asarray(data.pet) ----> 4 lb.fit_transform(np.asarray(data.pet)) /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/base.pyc in fit_transform(self, X, y, **fit_params) 406 if y is None: 407 # fit method of arity 1 (unsupervised transformation) --> 408 return self.fit(X, **fit_params).transform(X) 409 else: 410 # fit method of arity 2 (supervised transformation) /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/preprocessing/label.pyc in fit(self, y) 239 self.indicator_matrix_ = y_type == 'multilabel-indicator' 240 --> 241 self.classes_ = unique_labels(y) 242 243 return self /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/utils/multiclass.pyc in unique_labels(*ys) 96 _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None) 97 if not _unique_labels: ---> 98 raise ValueError("Unknown label type") 99 100 ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys)) ValueError: Unknown label type
['cat' 'dog' 'dog' 'fish' 'cat' 'dog' 'cat' 'fish']
from sklearn.utils.multiclass import type_of_target
print type_of_target(list(data.pet))
print type_of_target(data.pet)
print type_of_target(np.asarray(data.pet))
print np.asarray(data.pet).dtype
list(data.pet).dtype
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-31-8d4f067d1e39> in <module>() 5 6 print np.asarray(data.pet).dtype ----> 7 list(data.pet).dtype AttributeError: 'list' object has no attribute 'dtype'
multiclass unknown unknown object
## map the columns to transformations
## The mapper takes a list of pairs.
## The first is a column name from the pandas DataFrame (or a list of multiple columns).
## The second is an object which will perform the transformation which will be applied to that column
mapper = sklearn_pandas.DataFrameMapper([
('pet', preprocessing.LabelBinarizer()),
('children', preprocessing.StandardScaler())
])
## we can use the mapper's fit_transform shrotcut to both fit the model
## and see what transformed data looks like.
mapper.fit_transform(data)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-14-bd0a8637dc9d> in <module>() 1 ## we can use the mapper's fit_transform shrotcut to both fit the model 2 ## and see what transformed data looks like. ----> 3 mapper.fit_transform(data) /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/base.pyc in fit_transform(self, X, y, **fit_params) 406 if y is None: 407 # fit method of arity 1 (unsupervised transformation) --> 408 return self.fit(X, **fit_params).transform(X) 409 else: 410 # fit method of arity 2 (supervised transformation) /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn_pandas/__init__.pyc in fit(self, X, y) 44 transformer.fit(X[columns], y) 45 except TypeError: ---> 46 transformer.fit(X[columns]) 47 return self 48 /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/preprocessing/label.pyc in fit(self, y) 239 self.indicator_matrix_ = y_type == 'multilabel-indicator' 240 --> 241 self.classes_ = unique_labels(y) 242 243 return self /Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/sklearn/utils/multiclass.pyc in unique_labels(*ys) 96 _unique_labels = _FN_UNIQUE_LABELS.get(label_type, None) 97 if not _unique_labels: ---> 98 raise ValueError("Unknown label type") 99 100 ys_labels = set(chain.from_iterable(_unique_labels(y) for y in ys)) ValueError: Unknown label type