Skip to content

Instantly share code, notes, and snippets.

@Antrikshy
Created June 5, 2019 05:31
Show Gist options
  • Save Antrikshy/e117f072192357c7e4d01df9bfb47737 to your computer and use it in GitHub Desktop.
Save Antrikshy/e117f072192357c7e4d01df9bfb47737 to your computer and use it in GitHub Desktop.
Script to re-process Stack Overflow Developer Survey 2019 data to get tech popularity stats by self-reported developer type
import csv
from collections import Counter
have_tally = {}
want_tally = {}
# Get the dataset at insights.stackoverflow.com/survey
path_to_datum = '/path/to/downloaded/developer_survey_2019/survey_results_public.csv'
# This *should* work without too much modification with SO survey datasets from other
# years, but might require changing the names of columns.
with open(, 'r') as datum:
reader = csv.DictReader(datum)
for row in reader:
dev_types = row['DevType'].split(';')
have_worked_language = row['LanguageWorkedWith'].split(';')
have_worked_web_frame = row['WebFrameWorkedWith'].split(';')
have_worked_database = row['DatabaseWorkedWith'].split(';')
have_worked_platform = row['PlatformWorkedWith'].split(';')
have_worked_misc_tech = row['MiscTechWorkedWith'].split(';')
want_work_language = row['LanguageDesireNextYear'].split(';')
want_work_web_frame = row['WebFrameDesireNextYear'].split(';')
want_work_database = row['DatabaseDesireNextYear'].split(';')
want_work_platform = row['PlatformDesireNextYear'].split(';')
want_work_misc_tech = row['MiscTechDesireNextYear'].split(';')
have_techs = have_worked_language + have_worked_web_frame + have_worked_database + have_worked_platform + have_worked_misc_tech
want_techs = want_work_language + want_work_web_frame + want_work_database + want_work_platform + want_work_misc_tech
for dev_type in dev_types:
if dev_type == 'NA':
continue
if dev_type not in have_tally:
have_tally[dev_type] = Counter()
if dev_type not in want_tally:
want_tally[dev_type] = Counter()
for tech in have_techs:
if tech == 'NA':
continue
if tech not in have_tally[dev_type]:
have_tally[dev_type][tech] = 1
else:
have_tally[dev_type][tech] += 1
for tech in want_techs:
if tech == 'NA':
continue
if tech not in want_tally[dev_type]:
want_tally[dev_type][tech] = 1
else:
want_tally[dev_type][tech] += 1
print('====== Haves ======\n')
for dev_type in have_tally:
print(dev_type + '\n')
for tech, count in have_tally[dev_type].most_common(15):
print(tech + ' | ' + str(count))
print('\n')
print('====== Wants ======\n')
for dev_type in want_tally:
print(dev_type + '\n')
for tech, count in want_tally[dev_type].most_common(15):
print(tech + ' | ' + str(count))
print('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment