Last active
March 12, 2022 00:51
-
-
Save terrah27/1b541b4585249d3d504cc4cba2fcf55f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create a function to show missing value info | |
def get_missing_values_info(df): | |
# find missing values in each column | |
count_missing = df.isnull().sum() | |
# get missing values as percent | |
percent_missing = (100 * count_missing / df.shape[0]).round(1) | |
# Make dataframe with the results | |
missing_df = pd.DataFrame({'Count Missing':count_missing, | |
'Percent Missing':percent_missing}) | |
# sort values by percent missing | |
missing_df = missing_df.sort_values('Percent Missing', ascending=False) | |
# find percent of columns with missing values and compare to total number of columns | |
missing_values_list = df.columns[df.isnull().any()].tolist() | |
cols_missing_vals = len(missing_values_list) | |
df_cols = df.shape[1] | |
print(f'Columns With Missing Values: {cols_missing_vals}') | |
print(f'Total Columns: {df_cols}') | |
print(f'Percent of Columns with Missing Values: {round(cols_missing_vals/df_cols*100,1)}%') | |
return missing_df | |
get_missing_values_info(df).head(20) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment