Skip to content

Instantly share code, notes, and snippets.

@terrah27
Last active March 12, 2022 00:51
Show Gist options
  • Save terrah27/1b541b4585249d3d504cc4cba2fcf55f to your computer and use it in GitHub Desktop.
Save terrah27/1b541b4585249d3d504cc4cba2fcf55f to your computer and use it in GitHub Desktop.
# create a function to show missing value info
def get_missing_values_info(df):
# find missing values in each column
count_missing = df.isnull().sum()
# get missing values as percent
percent_missing = (100 * count_missing / df.shape[0]).round(1)
# Make dataframe with the results
missing_df = pd.DataFrame({'Count Missing':count_missing,
'Percent Missing':percent_missing})
# sort values by percent missing
missing_df = missing_df.sort_values('Percent Missing', ascending=False)
# find percent of columns with missing values and compare to total number of columns
missing_values_list = df.columns[df.isnull().any()].tolist()
cols_missing_vals = len(missing_values_list)
df_cols = df.shape[1]
print(f'Columns With Missing Values: {cols_missing_vals}')
print(f'Total Columns: {df_cols}')
print(f'Percent of Columns with Missing Values: {round(cols_missing_vals/df_cols*100,1)}%')
return missing_df
get_missing_values_info(df).head(20)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment