import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.compose import make_column_transformer
impute = SimpleImputer()
X = pd.DataFrame({'A':[1, 2, np.nan],
'B':[10, 20, 30],
'C':[100, 200, 300],
'D':[1000, 2000, 3000],
'E':[10000, 20000, 30000]})
X
A | B | C | D | E | |
---|---|---|---|---|---|
0 | 1.0 | 10 | 100 | 1000 | 10000 |
1 | 2.0 | 20 | 200 | 2000 | 20000 |
2 | NaN | 30 | 300 | 3000 | 30000 |
# impute A, passthrough B & C, then drop the remaining columns
ct = make_column_transformer(
(impute, ['A']),
('passthrough', ['B', 'C']),
remainder='drop')
ct.fit_transform(X)
array([[ 1. , 10. , 100. ], [ 2. , 20. , 200. ], [ 1.5, 30. , 300. ]])
# impute A, drop D & E, then passthrough the remaining columns
ct = make_column_transformer(
(impute, ['A']),
('drop', ['D', 'E']),
remainder='passthrough')
ct.fit_transform(X)
array([[ 1. , 10. , 100. ], [ 2. , 20. , 200. ], [ 1.5, 30. , 300. ]])
© 2020 Data School. All rights reserved.