from sklearn.ensemble import RandomForestClassifier as SklearnRF

from cudatree import RandomForestClassifier as CudaRF

import sklearn.datasets

d = sklearn.datasets.fetch_covtype(); x = d['data']; y = d['target']

print x.shape, y.shape

n = x.shape[0] / 2; xtrain = x[:n]; ytrain = y[:n]; xtest = x[n:]; ytest = y[n:]

skrf = SklearnRF(n_estimators = 21, n_jobs = 4)

cudarf = CudaRF()

time skrf.fit(xtrain, ytrain)

time cudarf.fit(xtrain, ytrain, n_trees = 21, bootstrap=False)

print "sklearn accuracy", np.mean(skrf.predict(xtest) ==  ytest)

print "cudatree accuracy", np.mean(cudarf.predict(xtest) == ytest)

cudarf.predict(xtest)

skrf.predict(xtest)