Skip to content

Instantly share code, notes, and snippets.

@firesofmay
Last active September 19, 2024 09:09
Show Gist options
  • Save firesofmay/0a96c847b01bd70ba04bbe829e27cc8f to your computer and use it in GitHub Desktop.
Save firesofmay/0a96c847b01bd70ba04bbe829e27cc8f to your computer and use it in GitHub Desktop.
This script processes a CSV file containing addresses, normalizes them, and adds city, state abbreviation, and full state name columns.
"""
CSV Address Normalizer
This script processes a CSV file containing addresses, normalizes them, and adds city, state abbreviation, and full state name columns.
Usage:
python main.py
Requirements:
- Python 3.6+
- Install required packages:
pip install usaddress-scourgify us
Input:
- CSV file with at least an 'address' column
Output:
- CSV file with additional columns: 'city', 'state_abbr', 'state_full'
"""
import csv
from scourgify import normalize_address_record
import us
def process_csv(input_file_path, output_file_path):
with open(input_file_path, 'r', newline='') as infile, open(output_file_path, 'w', newline='') as outfile:
reader = csv.reader(infile)
writer = csv.writer(outfile)
headers = next(reader, None)
if headers is None:
print("The input CSV file is empty.")
return
address_index = None
for index, header in enumerate(headers):
if header.lower() == "address":
address_index = index
break
if address_index is None:
print("Address column not found in the CSV file.")
return
new_headers = headers + ['city', 'state_abbr', 'state_full']
writer.writerow(new_headers)
for row in reader:
address = row[address_index]
try:
normalized = normalize_address_record(address)
city = normalized.get('city', '')
state_abbr = normalized.get('state', '')
state_full = us.states.lookup(state_abbr).name if state_abbr else ''
except Exception as e:
print(f"Error normalizing address: {address}. Error: {str(e)}")
city = state_abbr = state_full = ''
new_row = row + [city, state_abbr, state_full]
writer.writerow(new_row)
print(f"Processing complete. Output saved to {output_file_path}")
# Example usage
input_file_path = "input.csv" # Replace with your input CSV file path
output_file_path = "output.csv" # Replace with your desired output file path
process_csv(input_file_path, output_file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment