JettScythe · March 13, 2023 20:35
diff --git a/phone_generator.py b/phone_generator.py
 from phonenumbers import geocoder, carrier, number_type, parse, is_valid_number
 import argparse
 import json
 import csv
 from phone_gen import PhoneNumber
 from rapidfuzz import process, fuzz

 type_map = {0: "Fixed Line", 1: "Mobile", 2: "Fixed Line or Mobile", 3: "Toll Free", 4: "Premium Rate",
            5: "Shared Cost", 6: "VOIP", 7: "Personal Number", 8: "Pager",
            9: "Universal Access Number / Company Number", 10: "Voicemail", 99: "Unknown"}

 parser = argparse.ArgumentParser(description='Generate phone numbers for a country and save them to a CSV file.')
 parser.add_argument('country_code', type=str, help='The country code for the phone numbers to be generated. eg: US, '
                                                   'CA, IN')
 parser.add_argument('num_of_phones', type=int, help='The amount of numbers you would like to generate')
 args = parser.parse_args()
 with open('pincodes.json', 'r') as pincode_file:
    pincode_map = json.load(pincode_file)
 with open('pincode_district_map.json', 'r') as pincode_districts_file:
    pincode_district_map = json.load(pincode_districts_file)


 def create_final_dict(exact_matches):
    final_dict = {}
    for exact_match in exact_matches:
        final_dict[exact_match["Level"].lower()] = exact_match["Name"]
        if exact_match["TRU"] == "Rural":
            final_dict[f"{exact_match['Name']}_rural_pop"] = exact_match["TOT_P"]
        if exact_match["TRU"] == "Urban":
            final_dict[f"{exact_match['Name']}_urban_pop"] = exact_match["TOT_P"]
        if exact_match["TRU"] == "Total":
            final_dict[f"{exact_match['Name']}_total_pop"] = exact_match["TOT_P"]
    return final_dict


 def get_needed_maps_data(assumed_district: str, search_term: str):
    with open("merged.csv") as maps_data:
        reader = csv.DictReader(maps_data)
        best_matches = []
        exact_city_matches = []
        exact_subdistrict_matches = []
        exact_district_matches = []
        for row in reader:
            # find the row where district name matches
            if row["Level"] == "DISTRICT" and row["Name"].lower() == assumed_district:
                # find most similar name until row["Level"] == "DISTRICT" again
                while True:
                    try:
                        next_row = next(reader)
                    except StopIteration:
                        break
                    if next_row["Name"].lower() == search_term.lower() and next_row["Level"] != "DISTRICT":
                        exact_city_matches.append(next_row)
                    elif next_row["Name"].lower() == assumed_district and next_row["Level"] == "DISTRICT":
                        exact_district_matches.append(next_row)
                    elif next_row["Name"].lower() == assumed_district and next_row["Level"] == "SUB-DISTRICT":
                        exact_subdistrict_matches.append(next_row)
                    else:
                        name_similarity = fuzz.token_sort_ratio(search_term, next_row["Name"])
                        if not best_matches or name_similarity > best_matches[0]["name_similarity"]:
                            best_matches = [{"row": next_row, "name_similarity": name_similarity}]
                        elif name_similarity == best_matches[0]["name_similarity"]:
                            best_matches.append({"row": next_row, "name_similarity": name_similarity})
        merged_list = exact_city_matches + exact_district_matches + exact_subdistrict_matches
        return create_final_dict(merged_list)


 def generate_mobile_numbers():
    unique_phone_numbers = set()
    while len(unique_phone_numbers) < args.num_of_phones:
        phone_number = PhoneNumber(args.country_code).get_number()
        if is_valid_number(parse(phone_number)):
            unique_phone_numbers.add(phone_number)
    return unique_phone_numbers


 with open(f'phone_numbers_{args.country_code}_{args.num_of_phones}.csv',
          mode='w') as file:
    writer = csv.writer(file)
    writer.writerow(
        ['Phone Number', 'Type', 'Parsed Carrier', 'Parsed City',
         'Parsed Region', 'Pincode', 'District From Pincode', 'Sub-District From Pincode',
         'Village From Pincode', 'Village Total Population', 'Village Rural Population', 'Village Urban Population',
         'Sub-District Total Population', 'Sub-District Rural Population', 'Sub-District Urban Population',
         'District Total Population', 'District Rural Population', 'District Urban Population']
    )
    for phone_num in generate_mobile_numbers():
        parsed_phone_number = parse(phone_num)
        parsed_region = geocoder.description_for_number(parsed_phone_number, "en")
        parsed_city = ""
        pincode = ""
        if "," in parsed_region:
            parsed_city, parsed_region = parsed_region.split(", ")
            choices = pincode_map.keys()
            if parsed_city in choices:
                pincode = pincode_map[parsed_city]
                for row in pincode_district_map:
                    if row.get("Pincode") == int(pincode):
                        district_name = row["Districtname"].lower()
                mapped_data_row = get_needed_maps_data(district_name, parsed_city)
            else:
                results = process.extractOne(parsed_city, choices, scorer=fuzz.WRatio)
                assumed_city = results[0]
                pincode = pincode_map[assumed_city]
                for row in pincode_district_map:
                    if row.get("Pincode") == int(pincode):
                        district_name = row["Districtname"].lower()
                mapped_data_row = get_needed_maps_data(district_name, assumed_city)
        parsed_carrier = carrier.name_for_number(parsed_phone_number, "en", region=args.country_code)
        district = mapped_data_row.get("district")
        subdistrict = mapped_data_row.get("sub-district")
        village = mapped_data_row.get("village")
        phone_number_type = type_map[number_type(parsed_phone_number)]
        if parsed_region == "India":
            row = [phone_num, phone_number_type, parsed_carrier, parsed_city, parsed_region, pincode, "", "", "", "",
                   "", "", "", "", "", 1416459205, 909384771, 498179071]
        else:
            row = [phone_num, phone_number_type, parsed_carrier, parsed_city,
                   parsed_region, pincode, district, subdistrict, village, mapped_data_row.get(f"{village}_total_pop"),
                   mapped_data_row.get(f"{village}_rural_pop"), mapped_data_row.get(f"{village}_urban_pop"),
                   mapped_data_row.get(f"{subdistrict}_total_pop"), mapped_data_row.get(f"{subdistrict}_rural_pop"),
                   mapped_data_row.get(f"{subdistrict}_urban_pop"), mapped_data_row.get(f"{district}_total_pop"),
                   mapped_data_row.get(f"{district}_rural_pop"), mapped_data_row.get(f"{district}_urban_pop")]
        writer.writerow(row)
	from phonenumbers import geocoder, carrier, number_type, parse, is_valid_number
	import argparse
	import json
	import csv
	from phone_gen import PhoneNumber
	from rapidfuzz import process, fuzz

	type_map = {0: "Fixed Line", 1: "Mobile", 2: "Fixed Line or Mobile", 3: "Toll Free", 4: "Premium Rate",
	5: "Shared Cost", 6: "VOIP", 7: "Personal Number", 8: "Pager",
	9: "Universal Access Number / Company Number", 10: "Voicemail", 99: "Unknown"}

	parser = argparse.ArgumentParser(description='Generate phone numbers for a country and save them to a CSV file.')
	parser.add_argument('country_code', type=str, help='The country code for the phone numbers to be generated. eg: US, '
	'CA, IN')
	parser.add_argument('num_of_phones', type=int, help='The amount of numbers you would like to generate')
	args = parser.parse_args()
	with open('pincodes.json', 'r') as pincode_file:
	pincode_map = json.load(pincode_file)
	with open('pincode_district_map.json', 'r') as pincode_districts_file:
	pincode_district_map = json.load(pincode_districts_file)


	def create_final_dict(exact_matches):
	final_dict = {}
	for exact_match in exact_matches:
	final_dict[exact_match["Level"].lower()] = exact_match["Name"]
	if exact_match["TRU"] == "Rural":
	final_dict[f"{exact_match['Name']}_rural_pop"] = exact_match["TOT_P"]
	if exact_match["TRU"] == "Urban":
	final_dict[f"{exact_match['Name']}_urban_pop"] = exact_match["TOT_P"]
	if exact_match["TRU"] == "Total":
	final_dict[f"{exact_match['Name']}_total_pop"] = exact_match["TOT_P"]
	return final_dict


	def get_needed_maps_data(assumed_district: str, search_term: str):
	with open("merged.csv") as maps_data:
	reader = csv.DictReader(maps_data)
	best_matches = []
	exact_city_matches = []
	exact_subdistrict_matches = []
	exact_district_matches = []
	for row in reader:
	# find the row where district name matches
	if row["Level"] == "DISTRICT" and row["Name"].lower() == assumed_district:
	# find most similar name until row["Level"] == "DISTRICT" again
	while True:
	try:
	next_row = next(reader)
	except StopIteration:
	break
	if next_row["Name"].lower() == search_term.lower() and next_row["Level"] != "DISTRICT":
	exact_city_matches.append(next_row)
	elif next_row["Name"].lower() == assumed_district and next_row["Level"] == "DISTRICT":
	exact_district_matches.append(next_row)
	elif next_row["Name"].lower() == assumed_district and next_row["Level"] == "SUB-DISTRICT":
	exact_subdistrict_matches.append(next_row)
	else:
	name_similarity = fuzz.token_sort_ratio(search_term, next_row["Name"])
	if not best_matches or name_similarity > best_matches[0]["name_similarity"]:
	best_matches = [{"row": next_row, "name_similarity": name_similarity}]
	elif name_similarity == best_matches[0]["name_similarity"]:
	best_matches.append({"row": next_row, "name_similarity": name_similarity})
	merged_list = exact_city_matches + exact_district_matches + exact_subdistrict_matches
	return create_final_dict(merged_list)


	def generate_mobile_numbers():
	unique_phone_numbers = set()
	while len(unique_phone_numbers) < args.num_of_phones:
	phone_number = PhoneNumber(args.country_code).get_number()
	if is_valid_number(parse(phone_number)):
	unique_phone_numbers.add(phone_number)
	return unique_phone_numbers


	with open(f'phone_numbers_{args.country_code}_{args.num_of_phones}.csv',
	mode='w') as file:
	writer = csv.writer(file)
	writer.writerow(
	['Phone Number', 'Type', 'Parsed Carrier', 'Parsed City',
	'Parsed Region', 'Pincode', 'District From Pincode', 'Sub-District From Pincode',
	'Village From Pincode', 'Village Total Population', 'Village Rural Population', 'Village Urban Population',
	'Sub-District Total Population', 'Sub-District Rural Population', 'Sub-District Urban Population',
	'District Total Population', 'District Rural Population', 'District Urban Population']
	)
	for phone_num in generate_mobile_numbers():
	parsed_phone_number = parse(phone_num)
	parsed_region = geocoder.description_for_number(parsed_phone_number, "en")
	parsed_city = ""
	pincode = ""
	if "," in parsed_region:
	parsed_city, parsed_region = parsed_region.split(", ")
	choices = pincode_map.keys()
	if parsed_city in choices:
	pincode = pincode_map[parsed_city]
	for row in pincode_district_map:
	if row.get("Pincode") == int(pincode):
	district_name = row["Districtname"].lower()
	mapped_data_row = get_needed_maps_data(district_name, parsed_city)
	else:
	results = process.extractOne(parsed_city, choices, scorer=fuzz.WRatio)
	assumed_city = results[0]
	pincode = pincode_map[assumed_city]
	for row in pincode_district_map:
	if row.get("Pincode") == int(pincode):
	district_name = row["Districtname"].lower()
	mapped_data_row = get_needed_maps_data(district_name, assumed_city)
	parsed_carrier = carrier.name_for_number(parsed_phone_number, "en", region=args.country_code)
	district = mapped_data_row.get("district")
	subdistrict = mapped_data_row.get("sub-district")
	village = mapped_data_row.get("village")
	phone_number_type = type_map[number_type(parsed_phone_number)]
	if parsed_region == "India":
	row = [phone_num, phone_number_type, parsed_carrier, parsed_city, parsed_region, pincode, "", "", "", "",
	"", "", "", "", "", 1416459205, 909384771, 498179071]
	else:
	row = [phone_num, phone_number_type, parsed_carrier, parsed_city,
	parsed_region, pincode, district, subdistrict, village, mapped_data_row.get(f"{village}_total_pop"),
	mapped_data_row.get(f"{village}_rural_pop"), mapped_data_row.get(f"{village}_urban_pop"),
	mapped_data_row.get(f"{subdistrict}_total_pop"), mapped_data_row.get(f"{subdistrict}_rural_pop"),
	mapped_data_row.get(f"{subdistrict}_urban_pop"), mapped_data_row.get(f"{district}_total_pop"),
	mapped_data_row.get(f"{district}_rural_pop"), mapped_data_row.get(f"{district}_urban_pop")]
	writer.writerow(row)