from sklearn.ensemble import RandomForestClassifier as SklearnRF
from cudatree import RandomForestClassifier as CudaRF
/usr/lib/python2.7/dist-packages/nose/util.py:14: DeprecationWarning: The compiler package is deprecated and removed in Python 3.x. from compiler.consts import CO_GENERATOR
import sklearn.datasets
d = sklearn.datasets.fetch_covtype(); x = d['data']; y = d['target']
print x.shape, y.shape
(581012, 54) (581012,)
n = x.shape[0] / 2; xtrain = x[:n]; ytrain = y[:n]; xtest = x[n:]; ytest = y[n:]
skrf = SklearnRF(n_estimators = 21, n_jobs = 4)
cudarf = CudaRF()
time skrf.fit(xtrain, ytrain)
CPU times: user 0.35 s, sys: 0.66 s, total: 1.01 s Wall time: 19.40 s
RandomForestClassifier(bootstrap=True, compute_importances=None, criterion='gini', max_depth=None, max_features='auto', min_density=None, min_samples_leaf=1, min_samples_split=2, n_estimators=21, n_jobs=4, oob_score=False, random_state=None, verbose=0)
time cudarf.fit(xtrain, ytrain, n_trees = 21, bootstrap=False)
CPU times: user 13.90 s, sys: 0.24 s, total: 14.14 s Wall time: 14.15 s
print "sklearn accuracy", np.mean(skrf.predict(xtest) == ytest)
sklearn accuracy 0.724614982135
print "cudatree accuracy", np.mean(cudarf.predict(xtest) == ytest)
cudatree accuracy 0.633735619918
cudarf.predict(xtest)
array([2, 2, 1, ..., 3, 3, 3], dtype=int32)
skrf.predict(xtest)
array([1, 1, 1, ..., 3, 3, 3], dtype=int32)