%load_ext watermark %watermark -a 'Sebastian Raschka' -v -p pandas,numpy,scikit-learn -d import pandas as pd df = pd.DataFrame([ ['green', 'M', 10.1, 'class1'], ['red', 'L', 13.5, 'class2'], ['blue', 'XL', 15.3, 'class1']]) df.columns = ['color', 'size', 'prize', 'class label'] df class_mapping = {label:idx for idx,label in enumerate(set(df['class label']))} df['class label'] = df['class label'].map(class_mapping) df size_mapping = { 'XL': 3, 'L': 2, 'M': 1} df['size'] = df['size'].map(size_mapping) df color_mapping = { 'green': (0,0,1), 'red': (0,1,0), 'blue': (1,0,0)} df['color'] = df['color'].map(color_mapping) df import numpy as np y = df['class label'].values X = df.iloc[:, :-1].values X = np.apply_along_axis(func1d= lambda x: np.array(list(x[0]) + list(x[1:])), axis=1, arr=X) print('Class labels:', y) print('\nFeatures:\n', X) inv_color_mapping = {v: k for k, v in color_mapping.items()} inv_size_mapping = {v: k for k, v in size_mapping.items()} inv_class_mapping = {v: k for k, v in class_mapping.items()} df['color'] = df['color'].map(inv_color_mapping) df['size'] = df['size'].map(inv_size_mapping) df['class label'] = df['class label'].map(inv_class_mapping) df from sklearn.preprocessing import LabelEncoder class_le = LabelEncoder() df['class label'] = class_le.fit_transform(df['class label']) size_mapping = { 'XL': 3, 'L': 2, 'M': 1} df['size'] = df['size'].map(size_mapping) df class_le.inverse_transform(df['class label']) df.transpose().to_dict().values() from sklearn.feature_extraction import DictVectorizer dvec = DictVectorizer(sparse=False) X = dvec.fit_transform(df.transpose().to_dict().values()) X pd.DataFrame(X, columns=dvec.get_feature_names()) color_le = LabelEncoder() df['color'] = color_le.fit_transform(df['color']) df from sklearn.preprocessing import OneHotEncoder ohe = OneHotEncoder(sparse=False) X = ohe.fit_transform(df[['color']].values) X import pandas as pd df = pd.DataFrame([ ['green', 'M', 10.1, 'class1'], ['red', 'L', 13.5, 'class2'], ['blue', 'XL', 15.3, 'class1']]) df.columns = ['color', 'size', 'prize', 'class label'] size_mapping = { 'XL': 3, 'L': 2, 'M': 1} df['size'] = df['size'].map(size_mapping) class_mapping = {label:idx for idx,label in enumerate(set(df['class label']))} df['class label'] = df['class label'].map(class_mapping) df pd.get_dummies(df)