Skip to content

Instantly share code, notes, and snippets.

@indygreg
Last active September 23, 2015 14:57
Show Gist options
  • Save indygreg/bc2d5e8cf72c5bbd4987 to your computer and use it in GitHub Desktop.
Save indygreg/bc2d5e8cf72c5bbd4987 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python2.7
import collections
import csv
import re
import sys
#import plotly.plotly as py
#from plotly.graph_objs import *
from bokeh.charts import Bar, output_file, show, save
from bokeh.embed import components
RE_QUESTION_CHOICE = re.compile('^([^\]]+)\[(.*)\]$')
TEAM_QUESTION = 'What product do you work on the most?'
MULTIPLE_CHOICE_QUESTIONS = {
'What other languages do you work with?',
'What products do you work on? (Check all that apply)',
'What would improve your Treeherder experience?',
'What are your thoughts on MQ?',
'Which version control tools do you currently use for Mozilla projects?',
'Which workflows do you practice to author commits with Mercurial? (Check all that apply)',
'Why do you prefer Git over Mercurial? (check all that apply)',
'Why do you prefer Mercurial over Git? (Check all that apply)',
'If you use Git for working on Firefox, how do you obtain the source code? (Choose all that apply)',
'What version control tools do you use to work with mozilla-central and other project branches (inbound, fx-team, etc)?',
'How do you prefer to debug test failures?',
'Which version control tool do you prefer?',
}
OPEN_TEXT_QUESTIONS = {
'Please leave any other comments you may have on the Firefox build system and how improvements could affect your productivity.',
'Other',
'Please give more info in your answer(s) above',
'What\'s your biggest complaint about Mercurial as a general tool?',
'What\'s your biggest complaint about Mercurial at Mozilla?',
'Other feedback about Mercurial',
'What\'s your biggest complaint about Git?',
'Other feedback about Git at Mozilla',
'In your own words, feel free to tell us more about why you prefer Mercurial over Git.',
'In your own words, feel free to tell us why you prefer Git over Mercurial.',
'Before you go, what things did we fail to cover that would provide a boost to your productivity? Please be as specific as possible.',
}
PRODUCT_SHORTNAMES = {
'Gecko / Platform': 'Platform',
'Firefox for Desktop (Windows, OS X, Linux)': 'Fx-Team',
'Firefox OS': 'FxOS',
'Firefox for Android': 'Fennec',
'Firefox for iOS': 'iOS',
'Product Support (automation, tools, infrastructure, etc)': 'Support',
'Other': 'Other',
'All Other Responses': 'Other',
}
FILTER_PRODUCTS = {
'Gecko / Platform',
'Firefox for Desktop (Windows, OS X, Linux)',
#'Firefox OS',
#'Product Support (automation, tools, infrastructure, etc)',
}
SORTS = [
[
'No',
'Yes',
],
[
"This is my first job / I'm not sure",
'Slower',
'About the same',
'Faster',
],
[
'Not sure',
'No more productive - already at peak productivity',
'Up to 10% more productive',
'Up to 25% more productive',
'Up to 50% more productive',
"100%+ more productive (you'd be a machine)",
],
[
"N/A or I'm not sure",
'Not at all satisfied',
"It's not horrible, but it's still pretty bad",
'It sucks a little',
"It's neither bad nor good",
"It's OK, I guess",
"It's pretty good",
"It's awesome!",
],
[
'N/A',
"It's gotten worse",
'About the same',
'Gotten better',
],
[
'N/A',
'1 = least impactful',
'2',
'3',
'4 = most impactful',
],
[
'N/A',
'1 = little impact',
'2',
'3',
'4 = most impact',
],
[
'N/A',
'1 = least impact',
'2',
'3',
'4 = most impact',
],
[
'1 - no impact',
'2 - little impact',
'3 - a fair amount of impact',
'4 - a significant amount of impact',
'5 - an extremely frustrating amount of impact',
],
[
'N/A',
'Keep about the same',
'Increase investment',
'Drastically increase investment',
],
[
'N/A',
'Extremely dissatisfied',
'Below average',
'Average',
'Above average',
"It's awesome!",
],
[
'No impact / Not applicable',
'A little impact',
'Moderate impact',
'Significant impact',
'Tons of impact / Implement this ASAP',
],
[
'No knowledge',
'I know enough to do just the basics',
'Fairly competent (I still get stuck from time to time)',
'Highly competent',
],
[
'N/A',
'Once every few months',
'Monthly',
'Weekly',
'Daily',
],
[
'Never',
'Over a year ago',
'A few months ago',
'A few weeks ago',
'A few days ago',
],
[
'N/A',
'Not satisfied at all',
'Below average',
'Average',
'Above average',
"It's awesome!",
],
[
'N/A (not relevant to me)',
'Cut all investment',
'Cut some investment',
'Keep about the same',
'Increase investment',
'Drastically increase investment',
],
[
'N/A',
'Cut all investment',
'Decrease investment',
'Keep about the same',
'Increase investment',
'Drastically increase investment',
],
]
def read_csv(fh):
reader = csv.reader(fh)
columns = reader.next()
responses = list(reader)
return columns, responses
def iterate_answers(column, v):
if column in MULTIPLE_CHOICE_QUESTIONS:
for c in v.split(';'):
c = c.strip()
if c:
yield c
else:
yield v
def count_group_size(columns, responses):
for i, column in enumerate(columns):
if column != TEAM_QUESTION:
continue
counts = collections.Counter()
for r in responses:
counts[r[i]] += 1
return counts
def reduce_answers(columns, counts, group_sizes, ignore=False):
"""Replace low frequency answers with "all other responses."""
new_counts = {}
for column in columns:
if column not in counts:
continue
v = counts[column]
answer_counts = collections.Counter()
total = 0
if isinstance(v, collections.defaultdict):
for a, group_counts in v.items():
for group, c in group_counts.items():
if group not in FILTER_PRODUCTS:
continue
answer_counts[a] += c
total += c
if column in MULTIPLE_CHOICE_QUESTIONS:
total = 0
for group, c in group_sizes.items():
if group in FILTER_PRODUCTS:
total += c
else:
for a, c in v.items():
answer_counts[a] += c
total += c
to_filter = set()
for a, c in answer_counts.items():
if float(c) / float(total) <= 0.05:
to_filter.add(a)
if c <= 2:
to_filter.add(a)
if isinstance(v, collections.defaultdict):
new_counts[column] = collections.defaultdict(collections.Counter)
for a, group_counts in v.items():
if a in to_filter:
if ignore:
continue
a = 'All Other Responses'
for group, c in group_counts.items():
if group not in FILTER_PRODUCTS:
continue
new_counts[column][a][group] += c
else:
new_counts[column] = collections.Counter()
for a, c in v.items():
if a in to_filter:
if ignore:
continue
a = 'All Other Responses'
new_counts[column][a] += c
return new_counts
def counts_by_question(columns, responses):
counts = {}
for i, column in enumerate(columns):
if i < 2:
continue
if column in OPEN_TEXT_QUESTIONS:
continue
counts[column] = collections.Counter()
for r in responses:
v = r[i]
if not v:
continue
for a in iterate_answers(column, v):
counts[column][a] += 1
return counts
def counts_grouped_by_answer(columns, responses, question):
group_index = columns.index(question)
counts = {}
for i, column in enumerate(columns):
if i < 2:
continue
if i == group_index:
continue
if column in OPEN_TEXT_QUESTIONS:
continue
counts[column] = collections.defaultdict(collections.Counter)
for r in responses:
v = r[i]
if not v:
continue
group = r[group_index]
for a in iterate_answers(column, v):
counts[column][a][group] += 1
return counts
def sort_answers(answers):
"""Given an iterable of answers, sort according to proper order."""
srtd = None
for l in SORTS:
relevant = set(a for a in answers if a != 'All Other Responses')
if all(a in l for a in relevant):
srtd = list(l)
if 'All Other Responses' in answers:
srtd.insert(0, 'All Other Responses')
break
if not srtd:
return sorted(answers)
return [a for a in srtd if a in answers]
def plot_by_group(columns, answers, group_sizes):
plots = []
for i, column in enumerate(columns):
if column not in answers:
continue
all_answers = set(answers[column].keys())
counts_by_group = collections.Counter()
for counts in answers[column].values():
for group, count in counts.items():
counts_by_group[group] += count
bars = []
x = sort_answers(all_answers)
data = {}
for group in sorted(counts_by_group):
y = []
for answer in x:
counts = answers[column][answer]
if group not in counts:
y.append(0)
else:
percent = float(counts[group]) / float(group_sizes[group])
y.append(int(percent * 100.0))
data[PRODUCT_SHORTNAMES[group]] = y
b = Bar(data,
cat=x,
title=column,
ylabel='Percentage',
width=1280,
height=560,
legend=True,
tools='',
)
b.title_text_font_size = '13px'
plots.append(b)
script, divs = components(plots)
return script, divs
def write_html(fh, script, divs):
fh.write('<html><head><title>Survey Results</title>')
fh.write('<link href="https://people.mozilla.org/~gszorc/bokeh-0.9.3.min.css" rel="stylesheet" type="text/css" />')
fh.write('<script src="https://people.mozilla.org/~gszorc/bokeh-0.9.3.min.js"></script>')
fh.write(script)
fh.write('</head><body>\n')
for div in divs:
fh.write(div)
fh.write('</body></html>')
if __name__ == '__main__':
with open(sys.argv[1], 'rb') as fh:
columns, responses = read_csv(fh)
outfile = sys.argv[2]
group_sizes = count_group_size(columns, responses)
for group, count in sorted(group_sizes.items()):
print('N=%d\t%s' % (count, group))
by_question = counts_by_question(columns, responses)
#by_question = reduce_answers(columns, by_question, group_sizes, ignore=True)
by_group = counts_grouped_by_answer(columns, responses, TEAM_QUESTION)
by_group_reduced = reduce_answers(columns, by_group, group_sizes, ignore=False)
script, divs = plot_by_group(columns, by_group_reduced, group_sizes)
with open(outfile, 'wb') as fh:
write_html(fh, script, divs)
This file has been truncated, but you can view the full file.
<html><head><title>Survey Results</title><link href="https://people.mozilla.org/~gszorc/bokeh-0.9.3.min.css" rel="stylesheet" type="text/css" /><script src="https://people.mozilla.org/~gszorc/bokeh-0.9.3.min.js"></script><script type="text/javascript">
Bokeh.$(function() {
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment