In [1]:

categories = ['alt.atheism', 'soc.religion.christian', 'comp.graphics', 'sci.med']

In [2]:

from sklearn.datasets import fetch_20newsgroups

In [3]:

twenty_train = fetch_20newsgroups(subset='train', categories=categories, shuffle=True, random_state=42)

Downloading dataset from http://people.csail.mit.edu/jrennie/20Newsgroups/20news-bydate.tar.gz (14 MB)

In [4]:

twenty_train.target_names

Out[4]:

['alt.atheism', 'comp.graphics', 'sci.med', 'soc.religion.christian']

In [ ]: