import logging
from sklearn.datasets import fetch_rcv1
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import f1_score, precision_score, recall_score
from sklearn.pipeline import Pipeline
from sklearn import svm
logging.basicConfig()
rcv1 = fetch_rcv1()
training_samples = 23149
X_train = rcv1.data[:training_samples]
X_test = rcv1.data[training_samples:]
y_train = rcv1.target[:training_samples]
y_test = rcv1.target[training_samples:]
clf = OneVsRestClassifier(svm.LinearSVC(penalty='l1',tol=0.01,multi_class='crammer_singer',dual=False))
clf.fit(X_train,y_train)
/home/felipe/venv2/local/lib/python2.7/site-packages/sklearn/multiclass.py:70: UserWarning: Label not 49 is present in all training examples. str(classes[c])) /home/felipe/venv2/local/lib/python2.7/site-packages/sklearn/multiclass.py:70: UserWarning: Label not 80 is present in all training examples. str(classes[c]))
OneVsRestClassifier(estimator=LinearSVC(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, loss='squared_hinge', max_iter=1000, multi_class='crammer_singer', penalty='l1', random_state=None, tol=0.01, verbose=0), n_jobs=1)
y_pred = clf.predict(X_test)
current_score = f1_score(y_test,y_pred,average='micro')
current_score
0.80843419139591599