coreyhermanson · May 10, 2017 15:04
diff --git a/normalize_company.py b/normalize_company.py
 #!/usr/bin/env python

 import pyperclip
 import re
 from list_clipboard_manipulations import list_to_clipboard

 delete_counter = 0
 good_list = list()
 sort_alpha = False


 # Functions
 def clean_text(regex_list, word):
    """
    Checks word against a regex list and cleans the terms
    :param regex_list:
    :param word:
    :return: term after going through regex filter
    """
    new_text = word
    for r in regex_list:
        new_text = re.sub(r, '', new_text)
    return new_text


 # known endings to companies
 regexes = [
        " Ltd\.?$",
        " Inc\.?$",
        " Corp\.?$",
        " Corporation$",
        " Limited$",
        " Incorporated$",
        " [Pp][Ll][Cc]$"
        ]

 # Combine regexes in a string (not using)
 regex_string = "(" + ")|(".join(regexes) + ")"

 # Grab text from clipboard and convert to string,
 text = str(pyperclip.paste())

 # Split string into list using \r\n as delimiters, then filter out blank value (with 'None' argument), alphabetize
 if sort_alpha:
    input_list = list(sorted(filter(None, text.split('\r\n'))))
 else:
    input_list = list(filter(None, text.split('\r\n')))

    # Loop through domains in list, delete domains matching a regex string for known TLDs
    for term in input_list:
        new_term = clean_text(regexes, term)
        good_list.append(new_term)
        if new_term != term:
            delete_counter += 1

    # Check if len(list) > 0, then copy to clipboard
    list_to_clipboard(good_list)
    print(str(delete_counter) + " terms were edited.")
	#!/usr/bin/env python

	import pyperclip
	import re
	from list_clipboard_manipulations import list_to_clipboard

	delete_counter = 0
	good_list = list()
	sort_alpha = False


	# Functions
	def clean_text(regex_list, word):
	"""
	Checks word against a regex list and cleans the terms
	:param regex_list:
	:param word:
	:return: term after going through regex filter
	"""
	new_text = word
	for r in regex_list:
	new_text = re.sub(r, '', new_text)
	return new_text


	# known endings to companies
	regexes = [
	" Ltd\.?$",
	" Inc\.?$",
	" Corp\.?$",
	" Corporation$",
	" Limited$",
	" Incorporated$",
	" [Pp][Ll][Cc]$"
	]

	# Combine regexes in a string (not using)
	regex_string = "(" + ")\|(".join(regexes) + ")"

	# Grab text from clipboard and convert to string,
	text = str(pyperclip.paste())

	# Split string into list using \r\n as delimiters, then filter out blank value (with 'None' argument), alphabetize
	if sort_alpha:
	input_list = list(sorted(filter(None, text.split('\r\n'))))
	else:
	input_list = list(filter(None, text.split('\r\n')))

	# Loop through domains in list, delete domains matching a regex string for known TLDs
	for term in input_list:
	new_term = clean_text(regexes, term)
	good_list.append(new_term)
	if new_term != term:
	delete_counter += 1

	# Check if len(list) > 0, then copy to clipboard
	list_to_clipboard(good_list)
	print(str(delete_counter) + " terms were edited.")