Skip to content

Instantly share code, notes, and snippets.

@brpy
Last active January 25, 2021 12:36
Show Gist options
  • Save brpy/2939479e81b79c601525940a4afe65e4 to your computer and use it in GitHub Desktop.
Save brpy/2939479e81b79c601525940a4afe65e4 to your computer and use it in GitHub Desktop.
Code for Response coding for binary/multi classification tasks using Python.
import numpy as np
import pandas as pd
# Column names of categorical features
categorical_features = (
"school_state",
"teacher_prefix",
"project_grade_category",
"clean_categories",
"clean_subcategories",
)
# Name of target variable column
target = "project_is_approved"
response_coding = {}
classes = sorted([0, 1])
for category in categorical_features:
cat = {}
for sub_category in pd.unique(X_train[category]):
count_dict = pd.value_counts(
X_train[X_train[category] == sub_category][target]
).to_dict()
count = [count_dict.get(cls, 0) for cls in classes]
prob = tuple(map(lambda ele: ele / sum(count), count))
cat.update({sub_category: prob})
response_coding.update({category: cat})
def get_response_coding(df: pd.DataFrame) -> np.array:
"""
Returns response of type np.array of size (rows, no. of categorical features * n_classes)
"""
n_classes = len(classes)
n_rows = df.shape[0]
n_cols = len(categorical_features) * n_classes
default = tuple([1 / n_classes] * n_classes)
response = np.zeros((n_rows, n_cols), dtype=float)
for category in categorical_features:
start = categorical_features.index(category) * n_classes
cols = (start, start + n_classes)
for sub_category in pd.unique(X_train[category]):
rows = (df[category] == sub_category).values
response[rows, cols[0] : cols[1]] = list(
response_coding[category].get(sub_category, default)
)
return response
train_response = get_response_coding(X_train)
test_response = get_response_coding(X_test)
@brpy
Copy link
Author

brpy commented Jan 25, 2021

Change categorical_features and target variables to fit your dataset.

train and test dataframes are named as X_train and X_test respectively.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment