Skip to content

Instantly share code, notes, and snippets.

@jphall663
Created August 6, 2018 14:19
Show Gist options
  • Save jphall663/705595e3bc72e8fdfee8fa56220503a5 to your computer and use it in GitHub Desktop.
Save jphall663/705595e3bc72e8fdfee8fa56220503a5 to your computer and use it in GitHub Desktop.
Manual XGBoost grid search (Python)
iter_ = 0
best_error = 0
best_iter = 0
best_model = None
col_sample_rates = [0.1, 0.5, 0.9]
subsamples = [0.1, 0.5, 0.9]
etas = [0.01, 0.001]
max_depths = [3, 6, 12, 15, 18]
reg_alphas = [0.01, 0.001]
reg_lambdas = [0.01, 0.001]
ntrees = [200, 400]
total_models = len(col_sample_rates)*len(subsamples)*len(etas)*len(max_depths)*len(reg_alphas)*len(reg_lambdas)*len(ntrees)
# determine mean y value in training
y_mean = train[y].mean()
for col_sample_rate in col_sample_rates:
for subsample in subsamples:
for eta in etas:
for max_depth in max_depths:
for reg_alpha in reg_alphas:
for reg_lambda in reg_lambdas:
for ntree in ntrees:
tic = time.time()
print('---------- ---------')
print('Training model %d of %d ...' % (iter_ + 1, total_models))
print('col_sample_rate =', col_sample_rate)
print('subsample =', subsample)
print('eta =', eta)
print('max_depth =', max_depth)
print('reg_alpha =', reg_alpha)
print('reg_lambda =', reg_lambda)
print('ntree =', ntree)
params = {
'base_score': y_mean,
'booster': 'gbtree',
'colsample_bytree': col_sample_rate,
'eta': eta,
'eval_metric': 'auc',
'max_depth': max_depth,
'nthread': 4,
'objective': 'binary:logistic',
'reg_alpha': reg_alpha,
'reg_lambda': reg_lambda,
'monotone_constraints': mono_constraints,
'seed': 12345,
'silent': 0,
'subsample': subsample}
watchlist = [(dtrain, 'train'), (dtest, 'eval')]
model = xgb.train(
params,
dtrain,
ntree,
early_stopping_rounds=100,
evals=watchlist,
verbose_eval=False)
print('Model %d trained in %.2f s.' % (iter_, time.time()-tic))
print('Model %d best score = %.4f' % (iter_, model.best_score))
if model.best_score > best_error:
best_error = model.best_score
best_iter = iter_
best_model = model
print('Best so far!!!')
print('Best error =', best_error)
iter_ += 1
print('Best model found at iteration: %d, with error: %.4f.' % (best_iter + 1, best_error))
@Jason2Brownlee
Copy link

Great example of grid searching xgboost hyperparameters.

We can also get great results using bayesian optimization with xgboost.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment