Skip to content

Instantly share code, notes, and snippets.

@aria42
Last active October 13, 2019 19:58
Show Gist options
  • Save aria42/96fa4d9a059e67c6d012c7b15868df85 to your computer and use it in GitHub Desktop.
Save aria42/96fa4d9a059e67c6d012c7b15868df85 to your computer and use it in GitHub Desktop.
conditional_features.py
import pandas as pd
def build_conditional_features(df, cond_series, feat_cols=None):
"""
return a new dataframe with conditional features
"""
if len(cond_series) != len(df):
raise Exception("Condition series isn't same num rows as features")
if feat_cols is None:
feat_cols = list(df.columns)
cond_feat_to_series = {}
for target_cond in cond_series.unique():
cond_mask = cond_series.map(lambda c: int(c == target_cond))
for feat in feat_cols:
cond_feat = f"{feat}_||_{target_cond}"
cond_feat_to_series[cond_feat] = df[feat] * cond_mask
return pd.DataFrame(data=cond_feat_to_series)
raw_df = pd.DataFrame(data={'name': ['foo','bar','roo'], 'age': [32, 20, 35]})
feats_df = pd.DataFrame([[1,1,1],[0,1,1],[1,0,0]], columns=['f1', 'f2', 'f3'])
>>> feats_df
f1 f2 f3
0 1 1 1
1 0 1 1
2 1 0 0
def age_condition(row):
if row['age'] > 21:
return 'adult'
return 'minor'
cond_series = raw_df.apply(age_condition, axis=1)
cond_feats = build_conditional_features(feats_df, cond_series)
>>> cond_feats
f1_||_adult f2_||_adult f3_||_adult f1_||_minor f2_||_minor f3_||_minor
0 1 1 1 0 0 0
1 0 0 0 0 1 1
2 1 0 0 0 0 0
feats_df = pd.concat([feats_df, cond_feats], axis=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment