cesschneider · September 11, 2020 10:30
diff --git a/gistfile1.txt b/gistfile1.txt
 # Improvemets for production environment:
 # 1. Optimize filtering of undesireable chars using regex
 # 2. Distrubute filter/counting tasks between multiples threads
 # 3. Create unit test cases with multiple text contents

 import re

 input = open('data.txt', 'r')
 output = open('results.txt', 'w')
 remove_chars = ['\n', '-', '.', ',', '"', '(', ')', '[', ']', ':', ';']
 word_list = {}

 while True:
    line = input.readline()

    # if line is empty end of file is reached
    if not line:
        break

    # remove numbers and unexpected chars
    line = re.sub(r"\b\d+\b", "", line)
    for char in remove_chars:
        line = line.replace(char, '')

    words = line.split(' ')
    for word in words:
        word = word.lower()

        if len(word) == 0:
            continue
        if word in word_list:
            word_list[word] = word_list[word] + 1
        else:
            word_list[word] = 1

 sorted_list = sorted(word_list.items(), key=lambda x: x[1], reverse=True)
 for word in sorted_list:
    result = "{} ({})\n".format(word[0], word[1])
    print(result, end='')
    output.write(result)

 input.close()
 output.close()
	# Improvemets for production environment:
	# 1. Optimize filtering of undesireable chars using regex
	# 2. Distrubute filter/counting tasks between multiples threads
	# 3. Create unit test cases with multiple text contents

	import re

	input = open('data.txt', 'r')
	output = open('results.txt', 'w')
	remove_chars = ['\n', '-', '.', ',', '"', '(', ')', '[', ']', ':', ';']
	word_list = {}

	while True:
	line = input.readline()

	# if line is empty end of file is reached
	if not line:
	break

	# remove numbers and unexpected chars
	line = re.sub(r"\b\d+\b", "", line)
	for char in remove_chars:
	line = line.replace(char, '')

	words = line.split(' ')
	for word in words:
	word = word.lower()

	if len(word) == 0:
	continue
	if word in word_list:
	word_list[word] = word_list[word] + 1
	else:
	word_list[word] = 1

	sorted_list = sorted(word_list.items(), key=lambda x: x[1], reverse=True)
	for word in sorted_list:
	result = "{} ({})\n".format(word[0], word[1])
	print(result, end='')
	output.write(result)

	input.close()
	output.close()