Last active
August 24, 2022 01:20
-
-
Save jennynz/6a29ab14da932b114a7328fb0f1ae096 to your computer and use it in GitHub Desktop.
Datavis for how long contributors wait for a review on open source projects
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Given a dataframe called `df` which you've wrangled to have the columns: | |
# - org (str) | |
# - year (int or float) | |
# - bin (str) (e.g. Under 1 day, 1 day to 1 week) | |
# - percent (float) | |
# Here's the matplotlib code to generate the datavis in this blog post: | |
# https://levelup.gitconnected.com/how-does-pr-review-wait-time-affect-your-open-source-project-d79bd0af0ea3 | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# "Plus Jakarta Sans" open source font here: https://github.com/tokotype/PlusJakartaSans/releases | |
sns.set(style="whitegrid", font_scale=1.4, font="Plus Jakarta Sans") | |
org_repos_anonymised = [ | |
'facebook/react', | |
'Ruby framework 1 (anonymised)', | |
'Ruby framework 2 (anonymised)', | |
'tensorflow/tensorflow', | |
'travis-ci (various repos)', | |
'vuejs/vue' | |
] | |
fig = plt.figure(figsize=(9.5, 13)) | |
orgs = df['org'].unique() | |
axs = fig.subplots(len(orgs)+1,1) | |
years = list(range(2017, 2022)) | |
BAR_WIDTH = 1 | |
grouped = df.groupby(['organization_login', 'bin']) | |
for i, org in enumerate(orgs): | |
current_height = [0] * len(years) | |
for j, bin in enumerate(bins): | |
series_vals = grouped.get_group((org, bin)).sort_values('year')['percent'].values | |
axs[i].bar(x=years, width=BAR_WIDTH, bottom = current_height, height=series_vals, color=BIN_COLOURS[j]) | |
current_height += series_vals | |
axs[i].grid(axis='x', visible=False) | |
axs[i].spines['bottom'].set_visible(False) | |
axs[i].spines['top'].set_visible(False) | |
axs[i].spines['right'].set_visible(False) | |
axs[i].spines['left'].set_visible(False) | |
axs[i].set_ylim([0, 1]) | |
axs[i].set_yticks([]) | |
axs[i].set_xticks([]) | |
# Super hacky way to make the bottom of the figure a bit bigger so that the bottommost org name doesn't get cut off | |
axs[-1].grid(visible=False) | |
axs[-1].set_xticks([]) | |
axs[-1].set_yticks([]) | |
axs[-1].spines['bottom'].set_visible(False) | |
axs[-1].spines['top'].set_visible(False) | |
axs[-1].spines['right'].set_visible(False) | |
axs[-1].spines['left'].set_visible(False) | |
# Add title and axis names | |
axs[0].set_title("How long do contributors wait for a review?", loc='left', x = 0.045, pad=150, fontsize=24) | |
# Legend | |
fig.text(0.06, 0.945, '% of contributors with a median review wait time of...', ha='left', va='top', size=16) | |
for j, bin in enumerate(bins): | |
axs[0].plot([], [], color=BIN_COLOURS[j], lw=12, label=bin) | |
axs[0].legend(loc='lower left', bbox_to_anchor=(0.04,1.5), frameon=False, ncol=3) | |
axs[0].xaxis.tick_top() | |
axs[0].set_xticks(years) | |
axs[0].tick_params(axis="x", length=0, pad=10) | |
for i, org in enumerate(org_repos_anonymised): | |
fig.text(0.058, 0.73-i*0.12, org, ha='left', va='top', size=14) | |
plt.tight_layout(h_pad=4.5) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment