robertvunabandi · April 7, 2020 14:52
diff --git a/parse_raw_grape_data.py b/parse_raw_grape_data.py
 from typing import Dict, Generic, List, Tuple, TypeVar

 T = TypeVar("T")
 S = TypeVar("T")

 class Pair(Generic[T, S]):
    pass

 # filenames. Note that for the first 3 filenames, we named them this way.
 # so the name mapping must remain the same or you'd need to change the name
 # here to whatever name used.
 F_LOC = "locations.csv".      # extracted locations into its own csv
 F_ABBR = "abbreviations.csv"  # extracted abbreviatons into its own csv
 F_DATA = "data.csv"           # extracted the data into its own csv
 F_OUT = "clean_data.csv"      # the final output csv

 # fields
 class Field:
    YEAR = "year"
    LOC = "location"
    ABBR = "abbreviation"
    LAT = "latitude"
    LON = "longitude"
    VAL = "value"

 NEW_HEADERS = (Field.YEAR, Field.LOC, Field.ABBR, Field.LAT, Field.LON, Field.VAL)

 def create_parsed() -> None:
    # first, get the abbreviations and locations
    abbrs = get_abbreviations()
    locs = get_locations()
    # now, the data
    data = get_data()
    # now build the new table
    new_table = []
    new_table.append(list(NEW_HEADERS))
    for year, value_map in data:
        for loc, (lat, lon) in locs.items():
            row = []
            row.append(year)
            row.append(loc)
            row.append(abbrs[loc])
            row.append(lat)
            row.append(lon)
            row.append(value_map[loc])
            new_table.append(row)
    # store the data in the output file
    with open(F_OUT, "w") as f:
        for line in new_table:
            f.write(",".join(line) + "\n")

 def get_abbreviations() -> Dict[str, str]:
    """ Dict[Location, LocationAbbreviation] """
    with open(F_ABBR) as f:
        lines = f.read().splitlines()
        rows = [[s.strip() for s in line.split(",")] for line in lines]
        return dict(zip(rows[0], rows[1]))

 def get_locations() -> Dict[str, Tuple[str, str]]:
    """ Dict[Location, Tuple[Latitude, Longitude]] """
    with open(F_LOC) as f:
        lines = f.read().splitlines()
        rows = [[s.strip() for s in line.split(",")] for line in lines][1:]
        return {
            loc: (str(float(lat)), str(float(lon)))
            for loc, lat, lon in rows
        }

 def get_data() -> List[Pair[int, Dict[str, float]]]:
    """ List[Pair[Year, Dict[Location, Value]]] """
    with open(F_DATA) as f:
        lines = f.read().splitlines()
        rows = [line.split(",") for line in lines]
        header, rows = rows[0], rows[1:]
        year_idx = 0
        idx_to_location = {idx: loc.strip() for idx, loc in enumerate(header[1:])}
        return [
            [
                str(int(row[year_idx])),
                {
                    idx_to_location[idx]: try_float(value)
                    for idx, value in enumerate(row[1:])
                }
            ]
            for row in rows
        ]

 def try_float(s: str) -> str:
    try:
        return str(float(s))
    except ValueError:
        return ""


 if __name__ == "__main__":
    create_parsed()
	from typing import Dict, Generic, List, Tuple, TypeVar

	T = TypeVar("T")
	S = TypeVar("T")

	class Pair(Generic[T, S]):
	pass

	# filenames. Note that for the first 3 filenames, we named them this way.
	# so the name mapping must remain the same or you'd need to change the name
	# here to whatever name used.
	F_LOC = "locations.csv". # extracted locations into its own csv
	F_ABBR = "abbreviations.csv" # extracted abbreviatons into its own csv
	F_DATA = "data.csv" # extracted the data into its own csv
	F_OUT = "clean_data.csv" # the final output csv

	# fields
	class Field:
	YEAR = "year"
	LOC = "location"
	ABBR = "abbreviation"
	LAT = "latitude"
	LON = "longitude"
	VAL = "value"

	NEW_HEADERS = (Field.YEAR, Field.LOC, Field.ABBR, Field.LAT, Field.LON, Field.VAL)

	def create_parsed() -> None:
	# first, get the abbreviations and locations
	abbrs = get_abbreviations()
	locs = get_locations()
	# now, the data
	data = get_data()
	# now build the new table
	new_table = []
	new_table.append(list(NEW_HEADERS))
	for year, value_map in data:
	for loc, (lat, lon) in locs.items():
	row = []
	row.append(year)
	row.append(loc)
	row.append(abbrs[loc])
	row.append(lat)
	row.append(lon)
	row.append(value_map[loc])
	new_table.append(row)
	# store the data in the output file
	with open(F_OUT, "w") as f:
	for line in new_table:
	f.write(",".join(line) + "\n")

	def get_abbreviations() -> Dict[str, str]:
	""" Dict[Location, LocationAbbreviation] """
	with open(F_ABBR) as f:
	lines = f.read().splitlines()
	rows = [[s.strip() for s in line.split(",")] for line in lines]
	return dict(zip(rows[0], rows[1]))

	def get_locations() -> Dict[str, Tuple[str, str]]:
	""" Dict[Location, Tuple[Latitude, Longitude]] """
	with open(F_LOC) as f:
	lines = f.read().splitlines()
	rows = [[s.strip() for s in line.split(",")] for line in lines][1:]
	return {
	loc: (str(float(lat)), str(float(lon)))
	for loc, lat, lon in rows
	}

	def get_data() -> List[Pair[int, Dict[str, float]]]:
	""" List[Pair[Year, Dict[Location, Value]]] """
	with open(F_DATA) as f:
	lines = f.read().splitlines()
	rows = [line.split(",") for line in lines]
	header, rows = rows[0], rows[1:]
	year_idx = 0
	idx_to_location = {idx: loc.strip() for idx, loc in enumerate(header[1:])}
	return [
	[
	str(int(row[year_idx])),
	{
	idx_to_location[idx]: try_float(value)
	for idx, value in enumerate(row[1:])
	}
	]
	for row in rows
	]

	def try_float(s: str) -> str:
	try:
	return str(float(s))
	except ValueError:
	return ""


	if __name__ == "__main__":
	create_parsed()