Skip to content

Instantly share code, notes, and snippets.

@xdlg
Created January 9, 2020 13:17
Show Gist options
  • Save xdlg/f35a120224c0c3bb3c830cdeba5222c2 to your computer and use it in GitHub Desktop.
Save xdlg/f35a120224c0c3bb3c830cdeba5222c2 to your computer and use it in GitHub Desktop.
Simple script to count character frequencies in text files
#!/usr/bin/python3
import argparse
import string
from os.path import join
from glob import glob
from collections import Counter
def main():
parser = argparse.ArgumentParser()
parser.add_argument("directory", help="directory to scan")
parser.add_argument("extensions", help="file extensions, e.g. \"c,h\"")
parser.add_argument("-l", "--letters", help="count ASCII letters",
action="store_true")
parser.add_argument("-d", "--digits", help="count digits",
action="store_true")
parser.add_argument("-p", "--punctuation", help="count punctuation",
action="store_true")
parser.add_argument("-w", "--whitespace", help="count whitespace",
action="store_true")
args = parser.parse_args()
# Build the list of relevant file paths
file_paths = []
for extension in args.extensions.split(','):
ext = "**/*." + extension.strip()
file_paths.extend(glob(join(args.directory, ext), recursive=True))
# For each file, add the character counts to the total
counter = Counter()
for file_path in file_paths:
counter += Counter(open(file_path, 'r').read())
chars_to_count = ((string.ascii_letters if args.letters else "")
+ (string.digits if args.digits else "")
+ (string.punctuation if args.punctuation else "")
+ (string.whitespace if args.whitespace else ""))
# Print results
frequencies = dict(counter)
for char in sorted(frequencies, key=frequencies.get, reverse=True):
if (char in chars_to_count):
print (f"{repr(char)} \t {frequencies[char]}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment