Skip to content

Instantly share code, notes, and snippets.

@MattPitlyk
Last active July 16, 2016 22:44
Show Gist options
  • Save MattPitlyk/b6554a41b6f7cf54fb4fbbd92b30deb8 to your computer and use it in GitHub Desktop.
Save MattPitlyk/b6554a41b6f7cf54fb4fbbd92b30deb8 to your computer and use it in GitHub Desktop.
# coding: utf-8
"""
Scan all folders in path and output a list of folder names and sizes sorted by descending size.
"""
import os
from os.path import join, getsize
from collections import defaultdict
import argparse
def gather_sizes(base, extensions=None):
"""Walks files starting from base (or the curdir if base is None) and
collects files sizes.
Parameters
----------
base : string
Directory to use as the root of the analysis.
extensions : list, default None
If not none, this should be a list of file extensions to be included.
"""
if base is None:
# No base was supplied, so start from the current directory.
base = os.curdir()
if extensions:
# A sequence of extensions to include was supplied, so each file
# must be checked to determine if its type is included.
ext_set = set(extensions)
results = {}
by_type = defaultdict(int)
errors = []
for root, dirs, files in os.walk(base):
running_total = 0
for name in files:
if name[name.find('.')+1:] in ext_set:
try:
size = int(getsize(join(root, name))/(1000000.0))
running_total += size
by_type[name.split('.')[-1].lower()] += size
except:
errors.append(join(root, name))
results[root] = running_total
sorted_results = sorted(results.items(), key=lambda t: t[1], reverse=True)
return sorted_results
else:
results = {}
by_type = defaultdict(int)
errors = []
for root, dirs, files in os.walk(base):
running_total = 0
for name in files:
try:
size = int(getsize(join(root, name))/(1000000.0))
running_total += size
by_type[name.split('.')[-1].lower()] += size
except:
errors.append(join(root, name))
results[root] = running_total
sorted_results = sorted(results.items(), key=lambda t: t[1], reverse=True)
return sorted_results
def get_folder_total_size(folder, sorted_results):
# folder should be a path: 'C:\\Users\\Matt\\Dropbox\\Share'
# Get total size of a folder and its subfolders.
return sum([tup[1] for tup in sorted_results if tup[0].startswith(folder)])
def get_folders_total_sizes(sorted_results):
folder_sizes = {}
for tup1 in sorted_results:
folder_sizes[tup1[0]] = sum(tup[1] for tup in sorted_results if tup[0].startswith(tup1[0]))
folder_sizes = {k:v for k,v in folder_sizes.items() if v > 0}
return sorted(folder_sizes.items(), key=lambda t: t[1], reverse=True)
def size_by_type(by_type):
# Sort file types by collective size.
return sorted([tup for tup in by_type.iteritems() if tup[1] > 0], key=lambda t: t[1], reverse=True)
def total_size(by_type):
# Total size of file types.
return sum(by_type.values())
if __name__=='__main__':
parser = argparse.ArgumentParser(description='List folder sizes')
parser.add_argument('base',
action='store',
help='Base directory in which to start.')
parser.add_argument('-s', '--subfolders',
action='store_true',
dest='include_folders',
help='Include totals of subfolders.')
parser.add_argument('-m', '--minimum',
action='store',
dest='min_size',
type=int,
help='Minimum size of folders collected (MB).')
parser.add_argument('-o', '--output',
action='store',
dest='output_filename',
help='Name of output file.')
parser.add_argument('-e','--extensions',
nargs='*',
dest='extensions',
help='test multiple values')
results = parser.parse_args()
sorted_results = gather_sizes(results.base, results.extensions)
sorted_results = [tup for tup in sorted_results if tup[1] > results.min_size]
if results.include_folders:
sorted_folder_totals = get_folders_total_sizes(sorted_results)
if results.output_filename is None:
for tup in sorted_results:
print(tup[0] + '\t' + str(tup[1]))
if results.include_folders:
print('-' * 20)
for tup in sorted_folder_totals:
print(tup[0] + '\t' + str(tup[1]))
else:
with open(results.output_filename, 'wt') as f:
for tup in sorted_results:
f.write(tup[0] + '\t' + str(tup[1]) + '\n')
if results.include_folders:
f.write('\n' + '-' * 20 + '\n')
for tup in sorted_folder_totals:
f.write(tup[0] + '\t' + str(tup[1]) + '\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment