Last active
March 12, 2022 00:30
-
-
Save terrah27/ee75513d7573261d19f2251fe385e5b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remove columns base on percentage of missing values | |
def drop_missing_values(dataframe, threshold): | |
# create list of features with missing values over threshold | |
to_drop = [col for col in dataframe if \ | |
(dataframe[col].isnull().sum()/len(dataframe) >= threshold)] | |
print('Columns to drop: ' , (len(to_drop))) | |
# Drop features | |
dataframe = dataframe.drop(columns=to_drop) | |
print('Shape: ', dataframe.shape) | |
return dataframe | |
df_threshold = drop_missing_values(df, .5) | |
>>> Columns to drop: 41 | |
>>> Shape: (307511, 81) | |
# get updated missing value info | |
get_missing_values_info(df_threshold).head(20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment