Last active
February 14, 2022 04:27
-
-
Save afnanenayet/1da89b4dd2d1517a9cbbfaace6f97732 to your computer and use it in GitHub Desktop.
Add FIPS data to a CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A script to augment a CSV with FIPS data. | |
""" | |
import requests | |
import requests_futures | |
import pandas as pd | |
from typing import cast, Dict, Any | |
import pdb | |
import numpy as np | |
from loguru import logger | |
import json | |
from requests_futures.sessions import FuturesSession | |
from concurrent.futures import as_completed | |
results = [] | |
READABLE_FILE_PARAMS = { | |
"file_okay": True, | |
"dir_okay": False, | |
"path_type": "Path", | |
} | |
def get_fips_request(row) -> Dict[str, Any]: | |
# Get response from API | |
payload = { | |
"latitude": row["lat_tract"], | |
"longitude": row["long_tract"], | |
"format": "json", | |
} | |
return payload | |
def get_fips_data(row) -> str: | |
"""Get FIPS data for a row in the dataframe column. | |
We constrain this operation to a method so we can map over each row in the | |
DataFrame. | |
Args: | |
row: The row to retrieve FIPS information for. | |
Returns: | |
The FIPS string for the given row. | |
""" | |
url = "https://geo.fcc.gov/api/census/block/find" | |
# Get response from API | |
payload = { | |
"latitude": row["lat_tract"], | |
"longitude": row["long_tract"], | |
"format": "json", | |
} | |
try: | |
response = requests.get(url, params=payload) | |
# Parse json in response | |
data = response.json() | |
if not response.ok: | |
return "error" | |
return data["County"]["FIPS"] | |
except Exception: | |
print(f"Got an error trying to get FIPS code for lat: {row.lat_tract} lon: {row.long_tract}") | |
return "error" | |
def augment_fips() -> None: | |
"""Augment a CSV with FIPS information. | |
This script will read from INPUT_FNAME and write the augmented CSV to OUTPUT_FNAME. | |
""" | |
input_fname = "urbanization-census-tract.csv" | |
output_fname = "urbanization-census-tract-updated.csv" | |
df = cast(pd.DataFrame, pd.read_csv(input_fname)) | |
df["lat_tract"] = df["lat_tract"].astype(float) | |
df["long_tract"] = df["long_tract"].astype(float) | |
logger.info("Loaded DataFrame") | |
session = FuturesSession() | |
# Map over each row to get FIPS data. | |
payloads = df.apply(get_fips_request, axis=1) | |
url = "https://geo.fcc.gov/api/census/block/find" | |
futures = [session.get(url, params=payload) for payload in payloads] | |
for i in range(len(futures)): | |
futures[i].i = i | |
logger.info("Generated request queue") | |
all_resps = [None] * len(payloads) | |
fips_codes = [] | |
for f in as_completed(futures): | |
resp = f.result() | |
all_resps[f.i] = resp | |
for resp in all_resps: | |
if resp is None or not resp.ok: | |
logger.error("Response not OK") | |
fips_codes.append("error") | |
continue | |
try: | |
data = resp.json() | |
fips_codes.append(data["County"]["FIPS"]) | |
continue | |
except Exception as e: | |
logger.error(f"Error decoding response: {e}") | |
fips_codes.append("error") | |
continue | |
pdb.set_trace() | |
df["FIPS"] = np.array(fips_codes) | |
df.to_csv(output_fname) | |
if __name__ == "__main__": | |
augment_fips() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment