#!/usr/bin/env python # coding: utf-8 # [![Open in Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/justmarkham/scikit-learn-tips/master?filepath=notebooks%2F29_vectorize_two_columns.ipynb) # # [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/justmarkham/scikit-learn-tips/blob/master/notebooks/29_vectorize_two_columns.ipynb) # # # 🤖⚡ scikit-learn tip #29 ([video](https://www.youtube.com/watch?v=HyP5MvlmbRc&list=PL5-da3qGB5ID7YYAqireYEew2mWVvgmj6&index=29)) # # Want to vectorize two text columns in a ColumnTransformer? # # You can't pass them in a list, but you can pass the vectorizer twice! (They'll learn separate vocabularies.) # # See example 👇 # In[1]: import pandas as pd df = pd.read_csv('http://bit.ly/kaggletrain') # In[2]: X = df[['Name', 'Cabin']].dropna() # In[3]: from sklearn.feature_extraction.text import CountVectorizer vect = CountVectorizer() # In[4]: from sklearn.compose import make_column_transformer ct = make_column_transformer((vect, 'Name'), (vect, 'Cabin')) ct.fit_transform(X) # ### Want more tips? [View all tips on GitHub](https://github.com/justmarkham/scikit-learn-tips) or [Sign up to receive 2 tips by email every week](https://scikit-learn.tips) 💌 # # © 2020 [Data School](https://www.dataschool.io). All rights reserved.