Skip to content

Instantly share code, notes, and snippets.

@glouppe
Last active September 3, 2015 12:39
Show Gist options
  • Save glouppe/5f2fae95e80392e7a0e3 to your computer and use it in GitHub Desktop.
Save glouppe/5f2fae95e80392e7a0e3 to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_mldata
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from rep.estimators import TMVAClassifier
from functools import partial
try:
from time import process_time as bench_time
except ImportError:
from time import time as bench_time
data = fetch_mldata('MNIST original')
X = data.data
y = data.target
mask = (y == 3) | (y == 8)
X = X[mask]
X = X.astype(float)
X += np.random.normal(loc=0.0, scale=0.001, size=X.shape) # TMVA crashes on constant features
print X.dtype
y = y[mask]
y[y==3] = 0 # TMVA crashes on multi-class classification
y[y==8] = 1
indices = np.random.permutation(len(X))[:1000]
X, y = X[indices], y[indices]
X_train, X_test, y_train, y_test = train_test_split(X, y)
n_estimators = 100
max_features = int(X.shape[1] ** 0.5)
max_depth = 1000000
for name, EstimatorClass in [
("sklearn.RandomForestClassifier", partial(RandomForestClassifier,
n_estimators=n_estimators,
max_features=max_features,
max_depth=max_depth)),
("sklearn.ExtraTreesClassifier", partial(ExtraTreesClassifier,
n_estimators=n_estimators,
max_features=max_features,
max_depth=max_depth)),
("sklearn.GradientBoostingClassifier", partial(GradientBoostingClassifier,
n_estimators=n_estimators,
max_depth=3)),
("tmva.kBDT(BoostType=Bagging)", partial(TMVAClassifier,
method="kBDT",
BoostType="Bagging",
NTrees=n_estimators,
UseNvars=max_features,
MaxDepth=max_depth,
MinNodeSize=0.,
UseBaggedBoost=True,
UseRandomisedTrees=True,
nCuts=-1,
nEventsMin=1)),
("tmva.kBDT(BoostType=Grad)", partial(TMVAClassifier,
method="kBDT",
BoostType="Grad",
NTrees=n_estimators,
MaxDepth=3,
MinNodeSize=0.,
nCuts=-1,
nEventsMin=1))]:
est = EstimatorClass()
time_start = bench_time()
est.fit(X_train, y_train)
chrono = bench_time() - time_start
print("%s \t train time=%s \t score=%f" % (name,
chrono,
est.score(X_test, y_test)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment