Last active
April 20, 2023 02:14
-
-
Save liquidgenius/0e32e5230535547fa175ba2b9d7a2dff to your computer and use it in GitHub Desktop.
NYTElectionData: Downloads publicly available voting data from New York Times which was streamed in real-time during the 2020 elections. Data is in JSON format. Inspired by blog post: https://justamom.code.blog/2021/07/15/counting-votes/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
__license__ = "MIT" | |
__version__ = "0.1" | |
__py_version__ = "3.8.7" | |
__status__ = "Development" | |
import json | |
from pathlib import Path | |
from time import sleep | |
import requests | |
import us | |
from backpack import Collection | |
class NYTElectionDataDownloader: | |
''' | |
Downloads publicly available data from New York Times which was streamed in real-time during | |
the 2020 elections. Data is in JSON format. Note that no error capturing has been implemented. | |
Inspired by blog post: https://justamom.code.blog/2021/07/15/counting-votes/ | |
Required third party modules: | |
pip install us backpack | |
Usage | |
----- | |
# download all 50 states data from 2020-11-03 | |
NYTElectionDataDownloader() | |
# download a list of specific states from 2020-11-03 | |
target_states = ['florida', 'arizona'] | |
NYTElectionDataDownloader(states=target_states) | |
# download a specific state's data from 2020-11-03 | |
NYTElectionDataDownloader(states='florida') | |
''' | |
us_states = Collection([state.__dict__ for state in us.states.STATES]) | |
def __init__(self, page="race-page", position="president", date="2020-11-03", states=None, save=True, rate_limit=1): | |
""" | |
:param page: str: The NYT target page | |
:param position: str: The NYT position designation | |
:param date: str: The date data was transmitted to NYT | |
:param states: str: The required US states, blank captures all states | |
:param save: bool: Flag to save data to local filesystem, default True. | |
:param rate_limit: int: Limits the number of requests by provided seconds; default 1 second. | |
""" | |
self.base_url = "https://static01.nyt.com/elections-assets/2020/data/api" | |
self.page = page | |
self.position = position | |
self.date = date | |
self.rate_limit = rate_limit | |
self.states = [states] if isinstance(states, str) else states | |
self.states = [state.lower().replace(' ', '-') for state in self.us_states.pluck('name')] \ | |
if states is None else self.states | |
self.results = self.get_data() | |
if save: | |
self.save_locally(self.results) | |
def urlify(self, date, page, state, position): | |
""" Generates a valid NYT url. | |
:param date: str: The date data was transmitted to NYT | |
:param page: str: The NYT target page | |
:param states: str: The required US states, blank captures all states | |
:param position: str: The NYT position designation | |
:return: str: A properly formatted NYT url | |
""" | |
# generate url | |
result = f"{self.base_url}/{date}/{page}/{state}/{position}.json" | |
return result | |
def query(self, url): | |
""" Queries a url, respecting the data provider with a rate limit. | |
:param url: str: A properly formatted NYT url | |
:return: json: Publicly available JSON from NYT | |
""" | |
# limit request rate | |
sleep(self.rate_limit) | |
# query | |
print(f"Requesting url: {url}") | |
result = requests.get(url).json() | |
return result | |
def get_data(self): | |
""" Generates valid NYT urls and downloads data. """ | |
# generate target urls | |
target_urls = [self.urlify(self.date, self.page, state, self.position) for state in self.states] | |
# request data from target urls | |
print("Requesting data for all US States, this may take up to 60 seconds to download.") | |
results = [self.query(target_url) for target_url in target_urls] | |
print("Data acquired.") | |
return results | |
def save_json(self, data, filename): | |
""" Saves a NYT formatted JSON file to local directory. | |
:param data: | |
:param filename: | |
:return: | |
""" | |
try: | |
with open(f'{filename}.json', 'w') as json_file: | |
json.dump(data, json_file) | |
except UnboundLocalError as ule: | |
print("No results") | |
return None | |
def save_locally(self, data): | |
""" Parses a list of NYT results into separate JSON files and saves them to the local directory. """ | |
save_dir = Path(__file__).resolve().parent / 'data' | |
save_dir.mkdir(parents=True, exist_ok=True) | |
_ = [self.save_json(result, f"{save_dir}/{result['data']['races'][0]['race_slug']}") for result in self.results] | |
return None | |
class NYTEAnalyzer: | |
""" Loads NYT Election Data for analysis. """ | |
us_states = Collection([state.__dict__ for state in us.states.STATES]) | |
data_dir = Path(__file__).resolve().parent / 'data' | |
def __init__(self, states=None, position="president", category="general"): | |
self.position = position | |
self.category = category | |
self.files = None | |
self.states = [states] if isinstance(states, str) else states | |
self.states = [state.lower().replace(' ', '-') for state in self.us_states.pluck('abbr')] \ | |
if states is None else self.states | |
self.find_fraud() | |
self.lost_votes() | |
@staticmethod | |
def find_all_files(): | |
""" Determines what json files exist in the data directory. """ | |
save_dir = Path(__file__).resolve().parent / 'data' | |
file_path = save_dir.glob('*.json') #**/* | |
return [x for x in file_path if x.is_file()] | |
def get_files(self): | |
""" Determines which json files are in the 'data' directory. | |
:return: | |
""" | |
save_dir = Path(__file__).resolve().parent / 'data' | |
file_path = save_dir.glob('*.json') # **/* | |
return [x for x in file_path if x.is_file()] | |
def find_fraud(self): | |
# determine filepaths of json files in the 'data' directory | |
files = self.get_files() | |
# run fraud finder on each file | |
for file in files: | |
print(f"\n\n{file.name}") | |
with open(file, encoding="utf8") as f: | |
x = json.load(f) | |
TotalVotesLostTrump = 0 | |
TotalVotesLostBiden = 0 | |
TrumpToThird = 0 | |
TrumpToBiden = 0 | |
BidenToTrump = 0 | |
ThirdToTrump = 0 | |
ThirdToBiden = 0 | |
BidenToThird = 0 | |
TotalVotesLostThird = 0 | |
series = x["data"]["races"][0]["timeseries"] | |
for i in range(len(series)): | |
thirdPartyNow = series[i]["votes"] * ( | |
1 - series[i]["vote_shares"]["bidenj"] - series[i]["vote_shares"]["trumpd"]) | |
thirdPartyThen = series[i - 1]["votes"] * ( | |
1 - series[i - 1]["vote_shares"]["bidenj"] - series[i - 1]["vote_shares"]["trumpd"]) | |
TrumpNow = series[i]["votes"] * series[i]["vote_shares"]["trumpd"] | |
TrumpThen = series[i - 1]["votes"] * series[i - 1]["vote_shares"]["trumpd"] | |
BidenNow = series[i]["votes"] * series[i]["vote_shares"]["bidenj"] | |
BidenThen = series[i - 1]["votes"] * series[i - 1]["vote_shares"]["bidenj"] | |
if i != 0 and TrumpNow < TrumpThen and (TrumpThen - TrumpNow) > (0.00049999 * series[i]["votes"]) + 50: | |
if BidenNow > BidenThen or thirdPartyNow > thirdPartyThen: | |
if TrumpNow - TrumpThen <= BidenNow - BidenThen or TrumpNow - TrumpThen <= thirdPartyNow - thirdPartyThen: | |
print("(TRUMP") | |
print("Index : " + str(i) + " Past Index : " + str(i - 1)) | |
print(TrumpNow - TrumpThen) | |
TrumpLostNow = TrumpThen - TrumpNow | |
TrumpLostTotal = TrumpThen - TrumpNow | |
if BidenNow > BidenThen and TrumpNow - TrumpThen <= BidenNow - BidenThen: | |
if BidenNow - BidenThen > TrumpLostTotal: | |
TrumpToBiden += TrumpLostTotal | |
TrumpLostTotal = 0 | |
else: | |
TrumpToBiden += BidenNow - BidenThen | |
TrumpLostTotal -= BidenNow - BidenThen | |
if thirdPartyNow > thirdPartyThen and TrumpNow - TrumpThen <= thirdPartyNow - thirdPartyThen: | |
if thirdPartyNow - thirdPartyThen > TrumpLostTotal: | |
TrumpToThird += TrumpLostTotal | |
TrumpLostTotal = 0 | |
else: | |
TrumpToThird += thirdPartyNow - thirdPartyThen | |
TrumpLostTotal -= thirdPartyNow - thirdPartyThen | |
if TrumpLostNow < 0: | |
TrumpLostNow = 0 | |
TotalVotesLostTrump += TrumpLostNow - TrumpLostTotal | |
print("TRUMP)") | |
if i != 0 and BidenNow < BidenThen and (BidenThen - BidenNow) > (0.00049999 * series[i]["votes"]) + 50: | |
if TrumpNow > TrumpThen or thirdPartyNow > thirdPartyThen: | |
if BidenNow - BidenThen <= TrumpNow - TrumpThen or BidenNow - BidenThen <= thirdPartyNow - thirdPartyThen: | |
print("(BIDEN") | |
print("Index : " + str(i) + " Past Index : " + str(i - 1)) | |
print(BidenNow - BidenThen) | |
BidenLostNow = BidenThen - BidenNow | |
BidenLostTotal = BidenThen - BidenNow | |
if TrumpNow > TrumpThen and BidenNow - BidenThen <= TrumpNow - TrumpThen: | |
if TrumpNow - TrumpThen > BidenLostTotal: | |
BidenToTrump += BidenLostTotal | |
BidenLostTotal = 0 | |
else: | |
BidenToTrump += TrumpNow - TrumpThen | |
BidenLostTotal -= TrumpNow - TrumpThen | |
if thirdPartyNow > thirdPartyThen and BidenNow - BidenThen <= thirdPartyNow - thirdPartyThen: | |
if thirdPartyNow - thirdPartyThen > BidenLostTotal: | |
BidenToThird += BidenLostTotal | |
BidenLostTotal = 0 | |
else: | |
BidenToThird += thirdPartyNow - thirdPartyThen | |
BidenLostTotal -= thirdPartyNow - thirdPartyThen | |
if BidenLostNow < 0: | |
BidenLostNow = 0 | |
TotalVotesLostBiden += BidenLostNow - BidenLostTotal | |
print("BIDEN)") | |
if i != 0 and thirdPartyNow < thirdPartyThen and (thirdPartyThen - thirdPartyNow) > ( | |
0.00049999 * series[i]["votes"]) + 50: | |
if thirdPartyNow < thirdPartyThen: | |
if thirdPartyNow - thirdPartyThen <= TrumpNow - TrumpThen or thirdPartyNow - thirdPartyThen <= BidenNow - BidenThen: | |
print("(3RD PARTY") | |
print("Index : " + str(i) + " Past Index : " + str(i - 1)) | |
print(thirdPartyNow - thirdPartyThen) | |
ThirdLostTotal = thirdPartyThen - thirdPartyNow | |
ThirdLostNow = thirdPartyThen - thirdPartyNow | |
if BidenNow > BidenThen and thirdPartyNow - thirdPartyThen <= BidenNow - BidenThen: | |
if BidenNow - BidenThen > ThirdLostTotal: | |
ThirdToBiden += ThirdLostTotal | |
ThirdLostTotal = 0 | |
else: | |
ThirdToBiden += BidenNow - BidenThen | |
ThirdLostTotal -= BidenNow - BidenThen | |
if TrumpNow > TrumpThen and thirdPartyNow - thirdPartyThen <= TrumpNow - TrumpThen: | |
if TrumpNow - TrumpThen > ThirdLostTotal: | |
ThirdToTrump += ThirdLostTotal | |
ThirdLostTotal = 0 | |
else: | |
ThirdToTrump += TrumpNow - TrumpThen | |
ThirdLostTotal -= TrumpNow - TrumpThen | |
if ThirdLostNow < 0: | |
ThirdLostNow = 0 | |
TotalVotesLostThird += ThirdLostNow - ThirdLostTotal | |
print("3RD PARTY)") | |
print(str(str(TotalVotesLostTrump) + " TRUMP LOST")) | |
print(str(TrumpToBiden) + " Trump to Biden") | |
print(str(TrumpToThird) + " Trump to Third") | |
print(str(str(TotalVotesLostBiden) + " BIDEN LOST")) | |
print(str(BidenToTrump) + " Biden to Trump") | |
print(str(BidenToThird) + " Biden to Third") | |
print(str(str(TotalVotesLostThird) + " 3RD PARTY LOST")) | |
print(str(ThirdToBiden) + " Third to Biden") | |
print(str(ThirdToTrump) + " Third to Trump") | |
if BidenToTrump > TrumpToBiden: | |
print(str(BidenToTrump - TrumpToBiden) + " TRUMP") | |
elif TrumpToBiden > BidenToTrump: | |
print(str(TrumpToBiden - BidenToTrump) + " BIDEN") | |
self.lost_votes(x) | |
def lost_votes(self, x): | |
print(f"\n\nLost Votes") | |
TotalVotesLost = 0 | |
TotalVotesLostBiden = 0 | |
TotalVotesLostTrump = 0 | |
TotalVotesLostThird = 0 | |
series = x["data"]["races"][0]["timeseries"] | |
for i in range(len(series)): | |
thirdPartyNow = 1 - series[i]["vote_shares"]["bidenj"] - series[i]["vote_shares"]["trumpd"] | |
thirdPartyThen = 1 - series[i - 1]["vote_shares"]["bidenj"] - series[i - 1]["vote_shares"]["trumpd"] | |
if (series[i]["vote_shares"]["bidenj"] < (series[i - 1]["vote_shares"]["bidenj"] - 0.001) or | |
series[i]["vote_shares"]["bidenj"] > (series[i - 1]["vote_shares"]["bidenj"] + 0.001)) and ( | |
series[i]["vote_shares"]["trumpd"] < (series[i - 1]["vote_shares"]["trumpd"] - 0.001) or | |
series[i]["vote_shares"]["trumpd"] > (series[i - 1]["vote_shares"]["trumpd"] + 0.001)): | |
if i != 0 and series[i]["votes"] < series[i - 1]["votes"] and series[i]["votes"] * series[i]["vote_shares"][ | |
"bidenj"] < series[i - 1]["votes"] * series[i - 1]["vote_shares"]["bidenj"] and series[i]["votes"] * \ | |
series[i]["vote_shares"]["trumpd"] < series[i - 1]["votes"] * series[i - 1]["vote_shares"][ | |
"trumpd"]: | |
TotalVotesLost += series[i]["votes"] - series[i - 1]["votes"] | |
print("Index : " + str(i) + " Past Index : " + str(i - 1)) | |
print(series[i]["votes"] - series[i - 1]["votes"]) | |
TotalVotesLostTrump += series[i]["votes"] * series[i]["vote_shares"]["trumpd"] - series[i - 1][ | |
"votes"] * series[i - 1]["vote_shares"]["trumpd"] | |
TotalVotesLostBiden += series[i]["votes"] * series[i]["vote_shares"]["bidenj"] - series[i - 1][ | |
"votes"] * series[i - 1]["vote_shares"]["bidenj"] | |
TotalVotesLostThird += series[i]["votes"] * thirdPartyNow - series[i - 1]["votes"] * thirdPartyThen | |
print(str(TotalVotesLostTrump) + " TRUMP") | |
print(str(TotalVotesLostBiden) + " BIDEN") | |
print(str(TotalVotesLostThird) + " THIRD") | |
print(TotalVotesLost) | |
if __name__ == '__main__': | |
NYTElectionDataDownloader() | |
NYTEAnalyzer() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment