Last active
August 18, 2020 09:26
-
-
Save shaan-shah/9a32b40adbad28d4f8be037c591150f9 to your computer and use it in GitHub Desktop.
This was made to demonstrate code on medium.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def data_trainer(Target_Variable,data_raw,n_valid,date_column=None): | |
df_raw=data_raw | |
reset_rf_samples() | |
''' This if statement is to reduce the date part''' | |
if date_column: | |
add_datepart(df_raw,date_column) | |
train_cats(df_raw) | |
df,y,nas=proc_df(df_raw,Target_Variable) | |
n_trn=len(df)-n_valid | |
raw_train,raw_valid=split_vals(df_raw,n_trn) | |
X_train,X_valid=split_vals(df,n_trn) | |
y_train,y_valid=split_vals(y,n_trn) | |
''' The Decider is the rf sampling we will be doing to speed up the process''' | |
decider=None | |
if len(X_train)>20000: | |
decider=20000 | |
''' from here we are tuning the parameters ''' | |
if decider: | |
set_rf_samples(decider) | |
score=0 | |
min_leaf_a=0 | |
max_feature_a=None | |
z=None | |
list1=[1,3,5,10,25] | |
list2=[0.1,0.25,0.5,0.75,0.9,"sqrt","log2",1] | |
for leafs in list1: | |
for features in list2: | |
t=auto_train(a=leafs,b=features,X_train=X_train,y_train=y_train) | |
print(fx(t,X_valid=X_valid,y_valid=y_valid),leafs) | |
if fx(t,X_valid=X_valid,y_valid=y_valid)>score: | |
score=fx(m=t,X_valid=X_valid,y_valid=y_valid) | |
min_leaf_a=leafs | |
max_feature_a=features | |
z=t | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment