Skip to content

Instantly share code, notes, and snippets.

@mrahul17
Last active May 31, 2017 22:20
Show Gist options
  • Save mrahul17/6c42df29f3232ae31e52 to your computer and use it in GitHub Desktop.
Save mrahul17/6c42df29f3232ae31e52 to your computer and use it in GitHub Desktop.
Feature importance in XGBoost
# credits @mmueller https://www.kaggle.com/mmueller/liberty-mutual-group-property-inspection-prediction/xgb-feature-importance-python/code
import pandas as pd
import xgboost as xgb
import operator
from matplotlib import pylab as plt
def ceate_feature_map(features):
outfile = open('xgb.fmap', 'w')
i = 0
for feat in features:
outfile.write('{0}\t{1}\tq\n'.format(i, feat))
i = i + 1
outfile.close()
features, x_train, y_train = get_data()
ceate_feature_map(features)
importance = gbdt.get_fscore(fmap='xgb.fmap')
importance = sorted(importance.items(), key=operator.itemgetter(1))
df = pd.DataFrame(importance, columns=['feature', 'fscore'])
df['fscore'] = df['fscore'] / df['fscore'].sum()
plt.figure()
df.plot()
df.plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10))
plt.title('XGBoost Feature Importance')
plt.xlabel('relative importance')
plt.gcf().savefig('feature_importance_xgb.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment