import pandas as pd
import numpy as np
from sklearn.compose import make_column_transformer
X = pd.DataFrame({'Fare':[200, 300, 50, 900],
'Code':['X12', 'Y20', 'Z7', np.nan],
'Deck':['A101', 'C102', 'A200', 'C300']})
from sklearn.preprocessing import FunctionTransformer
clip_values = FunctionTransformer(np.clip, kw_args={'a_min':100, 'a_max':600})
# extract the first letter from each string
def first_letter(df):
return df.apply(lambda x: x.str.slice(0, 1))
get_first_letter = FunctionTransformer(first_letter)
ct = make_column_transformer(
(clip_values, ['Fare']),
(get_first_letter, ['Code', 'Deck']))
X
Fare | Code | Deck | |
---|---|---|---|
0 | 200 | X12 | A101 |
1 | 300 | Y20 | C102 |
2 | 50 | Z7 | A200 |
3 | 900 | NaN | C300 |
ct.fit_transform(X)
array([[200, 'X', 'A'], [300, 'Y', 'C'], [100, 'Z', 'A'], [600, nan, 'C']], dtype=object)
© 2020 Data School. All rights reserved.