Created
November 27, 2019 06:38
-
-
Save wolframalpha/ac8cb03e69efbb78d72fd878e994f856 to your computer and use it in GitHub Desktop.
Variable transformation and importance using OLS/regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import statsmodels.api as sm | |
transformations = {'log': np.log, | |
'sqrt': np.sqrt, | |
'sqr': lambda x: np.power(x, 2), | |
'cube': lambda x: np.power(x, 3), | |
'cubert': lambda x: np.power(x, -3), | |
'original': lambda x: x} | |
def run_transformation(df, columns_to_tranform, transformations, y_col): | |
y = df[y_col].values.astype(float) | |
all_df = [] | |
for col in columns_to_tranform: | |
# print(col) | |
for trans_name, trans in transformations.items(): | |
X = df[col].apply(trans).replace([np.inf, -np.inf, np.nan], 1e-6).values.reshape(-1, 1) | |
# print() | |
X = sm.add_constant(X) | |
# print() | |
lm1 = sm.OLS(y, X).fit() | |
s = lm1.summary() | |
df_results = pd.DataFrame(s.tables[1].data[1:], columns=s.tables[1].data[0],) | |
df_results['r2'] = s.tables[0].data[0][-1] | |
df_results['transformation'] = trans_name | |
df_results['variable'] = col | |
all_df.append(df_results) | |
return pd.concat(all_df, axis=0, ignore_index=True) | |
# return lm1 | |
results = run_transformation(df_merged, columns_to_tranform, transformations, 'VGO T 95') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment