import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import make_column_transformer
df = pd.read_csv('http://bit.ly/kaggletrain').dropna()
# select 4 features
X = df[['Embarked', 'Sex', 'Parch', 'Fare']]
# one-hot encode "Embarked" and "Sex", and passthrough "Parch" and "Fare"
ct = make_column_transformer(
(OneHotEncoder(), ['Embarked', 'Sex']),
remainder='passthrough')
# ColumnTransformer outputs 7 columns
ct.fit_transform(X).shape
(183, 7)
# get the names of those 7 features
ct.get_feature_names()
['onehotencoder__x0_C', 'onehotencoder__x0_Q', 'onehotencoder__x0_S', 'onehotencoder__x1_female', 'onehotencoder__x1_male', 'Parch', 'Fare']
© 2020 Data School. All rights reserved.