Skip to content

Instantly share code, notes, and snippets.

@jwhitlock
Created June 27, 2017 21:25
Show Gist options
  • Save jwhitlock/e8d324821bb68358799529e2b6ebab42 to your computer and use it in GitHub Desktop.
Save jwhitlock/e8d324821bb68358799529e2b6ebab42 to your computer and use it in GitHub Desktop.
Analyze PR merges for mozilla/kuma
#!/usr/bin/env python
from datetime import date, datetime, timedelta, tzinfo
import argparse
import requests
import requests_cache
import csv
import pprint
current = date.today()
# MDN full-time dev staff, with approx start and end, and "full-time" factor
dev_staff = (
('darkwing', date(2012, 4, 24), date(2015, 12, 15), 1),
('escattone', date(2016, 9, 26), current, 1),
('groovecoder', date(2011, 2, 15), date(2015, 12, 31), 1),
('jezdez', date(2013, 7, 1), date(2016, 3, 31), 1),
('jpetto', date(2017, 1, 1), date(2017, 3, 31), 1),
('jwhitlock', date(2015, 7, 1), current, 1),
('lmorchard', date(2011, 7, 12), date(2014, 5, 5), 1),
('openjck', date(2016, 1, 15), date(2012, 3, 26), 1),
('robhudson', date(2014, 3, 7), date(2016, 3, 31), 1),
('schalkneethling', date(2017, 4, 1), current, 1),
('stephaniehobson', date(2014, 6, 9), current, 1),
('ubernostrum', date(2011, 8, 22), date(2015, 6, 1), 1),
('willkg', date(2016, 1, 1), date(2016, 3, 31), 1),
('dchukhin', date(2016, 4, 1), date(2016, 7, 1), .25),
('emullaney', date(2016, 4, 1), date(2016, 7, 1), .25),
('jbradberry', date(2016, 4, 1), date(2016, 7, 1), 0),
('jsocol', date(2010, 2, 1), date(2013, 4, 5), 1),
)
other_staff = set((
'Elchi3',
'JeremiePat',
'Osmose',
'Sheeri',
'a2sheppy',
'bensternthal',
'chrisdavidmills',
'davehunt',
'escattone',
'glogiotatidis',
'hoosteeno',
'jgmize',
'lonnen',
'metadave',
'teoli2003',
'wbamberg',
))
staff = set([staff[0] for staff in dev_staff]) | other_staff
def user_is_staff(username):
return username in staff
class UTC(tzinfo):
"""UTC"""
ZERO = timedelta(0)
def utcoffset(self, dt):
return self.ZERO
def tzname(self, dt):
return "UTC"
def dst(self, dt):
return self.ZERO
utc = UTC()
def to_datetime(raw_date_str):
if raw_date_str is None:
return ''
dateformat = '%Y-%m-%dT%H:%M:%S'
date_str = raw_date_str
if raw_date_str.endswith('Z'):
date_str = date_str[:-1]
dt = datetime.strptime(date_str, dateformat)
return dt
def merged_pull_requests(owner, repo, client_id, client_secret, state='open', page=1):
url_params = {'owner': owner, 'repo': repo}
url_pat = 'https://api.github.com/repos/%(owner)s/%(repo)s/pulls'
payload = {
'client_id': client_id,
'client_secret': client_secret,
}
if page > 1:
payload['page'] = str(page)
if state != 'open':
payload['state'] = state
resp = requests.get(url_pat % url_params, params=payload)
print(resp.url)
prs = []
out = resp.json()
if 'message' in out:
pprint.pprint(out)
raise Exception(resp.text)
for pr in resp.json():
if pr['merged_at']:
data = {
'number': pr['number'],
'username': pr['user']['login'],
'title': pr['title'],
'merged_at': to_datetime(pr['merged_at']),
'created_at': to_datetime(pr['created_at']),
}
data['is_staff'] = 1 if user_is_staff(data['username']) else 0
data['created_month'] = data['created_at'].strftime('%Y-%m')
data['merged_month'] = data['merged_at'].strftime('%Y-%m')
data['secs_open'] = (data['merged_at'] - data['created_at']).total_seconds()
data['days_open'] = (data['merged_at'] - data['created_at']).days
prs.append(data)
return prs
columns = [
'number',
'username',
'is_staff',
'title',
'created_at',
'created_month',
'merged_at',
'merged_month',
'secs_open',
'days_open',
]
def pull_requests_merged_in_range(owner, repo, client_id, client_secret, start):
done = False
pr_rows = []
page = 0
while not done:
page += 1
prs = merged_pull_requests(owner, repo, client_id, client_secret, 'closed', page)
for pr in prs:
if pr['created_at'].date() < start:
done = True
else:
pr_rows.append(tuple(unicode(pr[item]).encode('utf8') for item in columns))
pr_rows.sort()
return pr_rows
by_month_columns = [
'month',
'staff',
'prs',
'by_staff',
'by_other',
'avg_secs_open',
'avg_days_open',
'prs_over_stafff',
'avg_secs_open_over_staff',
'avg_days_open_over_staff',
]
def pull_requests_by_month(prs):
by_month = {}
for pr in prs:
pr_dict = {name: val for name, val in zip(columns, pr)}
month = pr_dict['merged_month']
by_month.setdefault(month, []).append(pr_dict)
by_month_rows = []
for month in sorted(by_month.keys()):
# How many full time staff members?
staff_count = 0.0
raw_year, raw_month = month.split('-')
start_date = date(int(raw_year), int(raw_month), 1)
end_date = start_date
while end_date.month == start_date.month:
end_date += timedelta(days=1)
for username, staff_start, staff_end, factor in dev_staff:
if staff_start < end_date and staff_end >= start_date:
staff_count += factor
# Aggregate other data
by_staff, by_other, days, secs = 0, 0, 0.0, 0.0
for pr_dict in by_month[month]:
if pr_dict['is_staff'] == '1':
by_staff += 1
else:
by_other += 1
days += float(pr_dict['days_open'])
secs += float(pr_dict['secs_open'])
total = by_staff + by_other
avg_days = days / float(total)
avg_secs = secs / float(total)
prs_over_staff = float(total) / float(staff_count)
avg_days_over_staff = avg_days / float(staff_count)
avg_secs_over_staff = avg_secs / float(staff_count)
by_month_rows.append((
month,
staff_count,
total,
by_staff,
by_other,
avg_secs,
avg_days,
prs_over_staff,
avg_secs_over_staff,
avg_days_over_staff,
))
return by_month_rows
def debug_staff(prs):
staff_users = set()
nonstaff_users = set()
for pr in prs:
username = pr[1]
is_staff = pr[2]
if is_staff == '1':
staff_users.add(username)
else:
nonstaff_users.add(username)
print("Staff users:")
pprint.pprint(sorted(staff_users))
print("\nNon-staff users:")
pprint.pprint(sorted(nonstaff_users))
def valid_date(raw_date):
try:
return datetime.strptime(raw_date, "%Y-%m-%d").date()
except ValueError:
msg = "Not a valid date: '{0}'.".format(raw_date)
raise argparse.ArgumentTypeError(msg)
def get_args():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('client_id', type=str, help='GitHub client ID')
parser.add_argument('client_secret', type=str, help='GitHub client secret')
parser.add_argument('--start', type=valid_date,
help='start date, YYYY-MM-DD, default 2014-1-1',
default='2014-01-01')
parser.add_argument('--cachefile', type=str,
help='cache file for GitHub requests',
default='github_cache')
args = parser.parse_args()
return args
if __name__ == '__main__':
org = 'mozilla'
repo = 'kuma'
args = get_args()
requests_cache.install_cache(args.cachefile)
prs = pull_requests_merged_in_range(org, repo, args.client_id,
args.client_secret, args.start)
debug_staff(prs)
with file('github.csv', 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(columns)
writer.writerows(prs)
by_month = pull_requests_by_month(prs)
with file('prs_by_month.csv', 'wb') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(by_month_columns)
writer.writerows(by_month)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment