In [1]:

from biased_stop_words import get_stop_words
from sklearn.feature_extraction.text import CountVectorizer

In [2]:

biased_stop_words = get_stop_words('gendered')
corpus = [
    'He is an astronaut, he is on Venus',
    'He is an accountant, he is on Earth',
    'She is an astronaut, she is on Mars'
]

In [4]:

vectorizer = CountVectorizer(stop_words=biased_stop_words)
X = vectorizer.fit_transform(corpus)
print(vectorizer.get_feature_names())

print(X.toarray()) 

['accountant', 'an', 'astronaut', 'earth', 'is', 'mars', 'on', 'venus']
[[0 1 1 0 2 0 1 1]
 [1 1 0 1 2 0 1 0]
 [0 1 1 0 2 1 1 0]]