In [1]:
from sklearn.ensemble import RandomForestClassifier as SklearnRF
In [2]:
from cudatree import RandomForestClassifier as CudaRF
/usr/lib/python2.7/dist-packages/nose/util.py:14: DeprecationWarning: The compiler package is deprecated and removed in Python 3.x.
  from compiler.consts import CO_GENERATOR
In [3]:
import sklearn.datasets
In [4]:
d = sklearn.datasets.fetch_covtype(); x = d['data']; y = d['target']
In [5]:
print x.shape, y.shape
(581012, 54) (581012,)
In [6]:
n = x.shape[0] / 2; xtrain = x[:n]; ytrain = y[:n]; xtest = x[n:]; ytest = y[n:]
In [19]:
skrf = SklearnRF(n_estimators = 21, n_jobs = 4)
In [20]:
cudarf = CudaRF()
In [21]:
time skrf.fit(xtrain, ytrain)
CPU times: user 0.35 s, sys: 0.66 s, total: 1.01 s
Wall time: 19.40 s
Out[21]:
RandomForestClassifier(bootstrap=True, compute_importances=None,
            criterion='gini', max_depth=None, max_features='auto',
            min_density=None, min_samples_leaf=1, min_samples_split=2,
            n_estimators=21, n_jobs=4, oob_score=False, random_state=None,
            verbose=0)
In [27]:
time cudarf.fit(xtrain, ytrain, n_trees = 21, bootstrap=False)
CPU times: user 13.90 s, sys: 0.24 s, total: 14.14 s
Wall time: 14.15 s
In [28]:
print "sklearn accuracy", np.mean(skrf.predict(xtest) ==  ytest)
sklearn accuracy 0.724614982135
In [29]:
print "cudatree accuracy", np.mean(cudarf.predict(xtest) == ytest)
cudatree accuracy 0.633735619918
In [25]:
cudarf.predict(xtest)
Out[25]:
array([2, 2, 1, ..., 3, 3, 3], dtype=int32)
In [26]:
skrf.predict(xtest)
Out[26]:
array([1, 1, 1, ..., 3, 3, 3], dtype=int32)