Created
April 25, 2016 08:15
-
-
Save armgilles/09715f0b79880f8b42aab992fdb16b04 to your computer and use it in GitHub Desktop.
To select best features for a logistic with gridseachCV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
skf = StratifiedKFold(y, n_folds=5, random_state=17, shuffle=True) | |
C_params = [0.01 , 1, 10, 50, 70, 100] | |
solvers = ['newton-cg', 'lbfgs', 'liblinear', 'sag'] | |
my_result_list = [] | |
for C_param in C_params: | |
for solver in solvers: | |
print "Looking for C : %s and solver : %s" % (C_param, solver) | |
model = LogisticRegression(class_weight='balanced', random_state=17, | |
solver=solver, C=C_param) | |
sfs = SFS(model, | |
k_features=len(my_data.columns), | |
forward=True, | |
floating=False, | |
scoring='roc_auc', | |
print_progress=False, | |
cv=skf) | |
sfs = sfs.fit(my_data.values, y.values) | |
result_sfs = pd.DataFrame.from_dict(sfs.get_metric_dict()).T | |
result_sfs.sort_values('avg_score', ascending=0, inplace=True) | |
features_sfs = result_sfs.feature_idx.head(1).tolist() | |
select_features_sfs = list(my_data.columns[features_sfs]) | |
scores = cross_val_score(model, my_data[select_features_sfs], y, cv=skf, scoring='roc_auc') | |
my_result_list.append({'C' : C_param, | |
'solver' : solver, | |
'auc' : scores.mean(), | |
'std' : scores.std(), | |
'best_columns' : select_features_sfs, | |
'estimator' : model}) | |
my_result = pd.DataFrame(my_result_list) | |
my_result.sort_values('auc', ascending=0, inplace=True) | |
best_features = my_result.best_columns.head(1).values[0] | |
best_model = my_result.estimator.head(1).values[0] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment