Skip to content

Instantly share code, notes, and snippets.

@nagadomi
Forked from yagays/iris_xgboost.py
Last active September 22, 2016 01:29
Show Gist options
  • Save nagadomi/8aa20a6820bd80116ab9 to your computer and use it in GitHub Desktop.
Save nagadomi/8aa20a6820bd80116ab9 to your computer and use it in GitHub Desktop.
import numpy as np
import scipy as sp
import xgboost as xgb
from hyperopt import hp, fmin, tpe
from sklearn import datasets
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
np.random.seed(71)
iris = datasets.load_iris()
trainX = iris.data[0::2,:]
trainY = iris.target[0::2]
testX = iris.data[1::2,:]
testY = iris.target[1::2]
K = 10
kfold = cross_validation.KFold(n=len(trainX), n_folds=K,
shuffle=True, random_state=0)
params = []
def f(param):
score = 0
iteration = 0
for train_index, test_index in kfold:
xgb_model = xgb.XGBClassifier(**param)
xgb_model.fit(trainX[train_index], trainY[train_index],
eval_set=[(trainX[test_index], trainY[test_index])],
early_stopping_rounds=30,
eval_metric=('mlogloss'),
verbose=False)
score += xgb_model.best_score
iteration += xgb_model.best_iteration
param["score"] = score / K
param["n_estimators"] = int(iteration / K)
params.append(param)
return param["score"]
param_space = {
'nthread': 4,
'silent': 1,
'n_estimators': 1000,
'objective': 'multi:softprob',
'max_depth': hp.quniform('max_depth', 1, 10, 1),
'learning_rate': hp.quniform("eta", 0.001, 0.1, 0.001),
'subsample': hp.quniform('subsample', 0.1, 1.0, 0.01),
#'colsample_bytree': hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
'colsample_bytree': hp.quniform('colsample_bytree', 0.25, 1.0, 0.25), # iris has only 4 feat. 1/4 = 0.25
'gamma': hp.quniform('gamma', 0.1, 20.0, 0.1),
'min_child_weight': hp.quniform('min_child_weight', 1.0, 20.0, 0.1)
}
fmin(f, param_space, algo=tpe.suggest, max_evals=200)
params = sorted(params, key=lambda param : param['score'])
best = params[0]
print("**best param")
print(best)
del best["score"]
xgb_model = xgb.XGBClassifier(**best)
xgb_model.fit(trainX, trainY)
predict = xgb_model.predict(testX)
print confusion_matrix(testY, predict)
# **best param
# {'colsample_bytree': 0.5, 'silent': 1, 'learning_rate': 0.063, 'nthread': 4, 'min_child_weight': 1.0, 'n_estimators': 168, 'subsample': 0.78, 'score': 0.1174425, 'objective': 'multi:softprob', 'max_depth': 3.0, 'gamma': 0.1}
# [[25 0 0]
# [ 0 23 2]
# [ 0 0 25]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment