Last active
July 16, 2016 22:44
-
-
Save MattPitlyk/b6554a41b6f7cf54fb4fbbd92b30deb8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
""" | |
Scan all folders in path and output a list of folder names and sizes sorted by descending size. | |
""" | |
import os | |
from os.path import join, getsize | |
from collections import defaultdict | |
import argparse | |
def gather_sizes(base, extensions=None): | |
"""Walks files starting from base (or the curdir if base is None) and | |
collects files sizes. | |
Parameters | |
---------- | |
base : string | |
Directory to use as the root of the analysis. | |
extensions : list, default None | |
If not none, this should be a list of file extensions to be included. | |
""" | |
if base is None: | |
# No base was supplied, so start from the current directory. | |
base = os.curdir() | |
if extensions: | |
# A sequence of extensions to include was supplied, so each file | |
# must be checked to determine if its type is included. | |
ext_set = set(extensions) | |
results = {} | |
by_type = defaultdict(int) | |
errors = [] | |
for root, dirs, files in os.walk(base): | |
running_total = 0 | |
for name in files: | |
if name[name.find('.')+1:] in ext_set: | |
try: | |
size = int(getsize(join(root, name))/(1000000.0)) | |
running_total += size | |
by_type[name.split('.')[-1].lower()] += size | |
except: | |
errors.append(join(root, name)) | |
results[root] = running_total | |
sorted_results = sorted(results.items(), key=lambda t: t[1], reverse=True) | |
return sorted_results | |
else: | |
results = {} | |
by_type = defaultdict(int) | |
errors = [] | |
for root, dirs, files in os.walk(base): | |
running_total = 0 | |
for name in files: | |
try: | |
size = int(getsize(join(root, name))/(1000000.0)) | |
running_total += size | |
by_type[name.split('.')[-1].lower()] += size | |
except: | |
errors.append(join(root, name)) | |
results[root] = running_total | |
sorted_results = sorted(results.items(), key=lambda t: t[1], reverse=True) | |
return sorted_results | |
def get_folder_total_size(folder, sorted_results): | |
# folder should be a path: 'C:\\Users\\Matt\\Dropbox\\Share' | |
# Get total size of a folder and its subfolders. | |
return sum([tup[1] for tup in sorted_results if tup[0].startswith(folder)]) | |
def get_folders_total_sizes(sorted_results): | |
folder_sizes = {} | |
for tup1 in sorted_results: | |
folder_sizes[tup1[0]] = sum(tup[1] for tup in sorted_results if tup[0].startswith(tup1[0])) | |
folder_sizes = {k:v for k,v in folder_sizes.items() if v > 0} | |
return sorted(folder_sizes.items(), key=lambda t: t[1], reverse=True) | |
def size_by_type(by_type): | |
# Sort file types by collective size. | |
return sorted([tup for tup in by_type.iteritems() if tup[1] > 0], key=lambda t: t[1], reverse=True) | |
def total_size(by_type): | |
# Total size of file types. | |
return sum(by_type.values()) | |
if __name__=='__main__': | |
parser = argparse.ArgumentParser(description='List folder sizes') | |
parser.add_argument('base', | |
action='store', | |
help='Base directory in which to start.') | |
parser.add_argument('-s', '--subfolders', | |
action='store_true', | |
dest='include_folders', | |
help='Include totals of subfolders.') | |
parser.add_argument('-m', '--minimum', | |
action='store', | |
dest='min_size', | |
type=int, | |
help='Minimum size of folders collected (MB).') | |
parser.add_argument('-o', '--output', | |
action='store', | |
dest='output_filename', | |
help='Name of output file.') | |
parser.add_argument('-e','--extensions', | |
nargs='*', | |
dest='extensions', | |
help='test multiple values') | |
results = parser.parse_args() | |
sorted_results = gather_sizes(results.base, results.extensions) | |
sorted_results = [tup for tup in sorted_results if tup[1] > results.min_size] | |
if results.include_folders: | |
sorted_folder_totals = get_folders_total_sizes(sorted_results) | |
if results.output_filename is None: | |
for tup in sorted_results: | |
print(tup[0] + '\t' + str(tup[1])) | |
if results.include_folders: | |
print('-' * 20) | |
for tup in sorted_folder_totals: | |
print(tup[0] + '\t' + str(tup[1])) | |
else: | |
with open(results.output_filename, 'wt') as f: | |
for tup in sorted_results: | |
f.write(tup[0] + '\t' + str(tup[1]) + '\n') | |
if results.include_folders: | |
f.write('\n' + '-' * 20 + '\n') | |
for tup in sorted_folder_totals: | |
f.write(tup[0] + '\t' + str(tup[1]) + '\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment