import pandas as pd
import numpy as np
X = pd.DataFrame({'Age':[20, 30, 10, np.nan, 10]})
X
Age | |
---|---|
0 | 20.0 |
1 | 30.0 |
2 | 10.0 |
3 | NaN |
4 | 10.0 |
from sklearn.impute import SimpleImputer
# impute the mean
imputer = SimpleImputer()
imputer.fit_transform(X)
array([[20. ], [30. ], [10. ], [17.5], [10. ]])
# impute the mean and add an indicator matrix (new in 0.21)
imputer = SimpleImputer(add_indicator=True)
imputer.fit_transform(X)
array([[20. , 0. ], [30. , 0. ], [10. , 0. ], [17.5, 1. ], [10. , 0. ]])
© 2020 Data School. All rights reserved.