Skip to content

Instantly share code, notes, and snippets.

@John-K
Created March 29, 2020 19:52
Show Gist options
  • Save John-K/4244c42c6cfb986857e66b77597ca18c to your computer and use it in GitHub Desktop.
Save John-K/4244c42c6cfb986857e66b77597ca18c to your computer and use it in GitHub Desktop.
parse NYT covid data into python dictionaries
#!/usr/bin/env python
# run from the NYT data directory:
# python covid.py
import csv
import pprint
# data structures
by_date = {}
by_state = {}
# use prettyprint so anything we print looks nice
pp = pprint.PrettyPrinter(indent=4)
# Load the data into two maps, one organized by date, the other by state
with open('us-states.csv', newline='') as csvfile:
# DictReader uses the first row as data labels
reader = csv.DictReader(csvfile)
for line in reader:
no_date = {k: v for k, v in line.items() if not k.startswith('date')}
no_state = {k: v for k, v in line.items() if not k.startswith('state')}
# create the first record or append to an existing one
if line['date'] in by_date.keys():
by_date[line['date']].append(no_date)
else:
by_date[line['date']] = [no_date]
# create the first record or append to an existing one
if line['state'] in by_state.keys():
by_state[line['state']].append(no_state)
else:
by_state[line['state']] = [no_state]
# print data structures to see what's in them
pp.pprint(by_state)
pp.pprint(by_date)
@John-K
Copy link
Author

John-K commented Mar 29, 2020

to get all entries for a given date:
pp.pprint(by_date['2020-03-03'])

to get all dates for a given state:
pp.pprint(by_state['California'])

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment