Last active
August 29, 2015 14:07
-
-
Save ghaering/46dd6362172f526d8132 to your computer and use it in GitHub Desktop.
Make sure a cache directory does not overflow the entire filesystem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Keeps a cache directory from growing over a given size limit. | |
# | |
# There are two modes to manage the cache folder: | |
# 1) absolute size limit of the cache folder (--size flag); | |
# 2) keep percentage free space on filesystem (--percent_free flag) | |
import asyncore | |
from collections import OrderedDict | |
import os | |
import sys | |
import threading | |
import time | |
import pyinotify | |
import click | |
class LastUpdatedOrderedDict(OrderedDict): | |
"Store items in the order the keys were last added." | |
def __setitem__(self, key, value): | |
if key in self: | |
del self[key] | |
OrderedDict.__setitem__(self, key, value) | |
class CacheManagerMaxSize(object): | |
""" | |
Keep a cache folder within a size limit. | |
""" | |
def __init__(self, max_size): | |
self.max_size = max_size | |
# map file name to file size | |
self.cache = LastUpdatedOrderedDict() | |
# total size of all files in the cache | |
self.total_size = 0 | |
def delete(self, entry): | |
# remove a file from the cache, also delete it from the filesystem. | |
# either step can fail if the file was deleted from a different process. | |
try: | |
os.unlink(entry) | |
except OSError: | |
pass | |
try: | |
self.total_size -= self.cache[entry] | |
del self.cache[entry] | |
except KeyError: | |
pass | |
def add(self, entry, size): | |
old_size = self.cache.get(entry, 0) | |
self.total_size -= old_size | |
self.cache[entry] = size | |
self.total_size += size | |
# free space if cache size is exceeded | |
if self.total_size > self.max_size: | |
for cache_key in self.cache.iterkeys(): | |
self.delete(cache_key) | |
if self.total_size < self.max_size: | |
break | |
class CacheManagerPercentFree(object): | |
""" | |
Keep as many files in a cache folder that the file system it's on keeps | |
a certain percentage of free space available. | |
""" | |
def __init__(self, percent_free, folder): | |
self.percent_free = percent_free | |
self.folder = folder | |
self.cache = LastUpdatedOrderedDict() | |
def delete(self, entry): | |
# cf. CacheManagerMaxSize.delete | |
try: | |
os.unlink(entry) | |
except OSError: | |
pass | |
try: | |
del self.cache[entry] | |
except KeyError: | |
pass | |
def add(self, entry, size): | |
self.cache[entry] = size | |
# delete files until file system has required 'free' percentage | |
for cache_key in self.cache.iterkeys(): | |
stat = os.statvfs(self.folder) | |
if float(stat.f_bavail) / stat.f_blocks > self.percent_free: | |
break | |
self.delete(cache_key) | |
class EventHandler(pyinotify.ProcessEvent): | |
def __init__(self, max_size=None, percent_free=None, folder=None): | |
if max_size is not None: | |
self.cache_manager = CacheManagerMaxSize(max_size) | |
else: | |
self.cache_manager = CacheManagerPercentFree(percent_free, folder) | |
def process_IN_DELETE(self, event): | |
self.cache_manager.delete(event.pathname) | |
def process_IN_CLOSE_NOWRITE(self, event): | |
self.update_cache(event.pathname) | |
def process_IN_CLOSE_WRITE(self, event): | |
self.update_cache(event.pathname) | |
def update_cache(self, path): | |
if not os.path.isfile(path): | |
return | |
try: | |
size = os.stat(path).st_size | |
except OSError: | |
return | |
self.cache_manager.add(path, size) | |
def quit_after(seconds): | |
time.sleep(seconds) | |
print "Exiting due to --exit-after flag" | |
os._exit(0) | |
@click.command() | |
@click.option("--folder", required=True, help="cache folder to watch") | |
@click.option("--size", help="max size to keep folder (e. g. 500M, 3G)") | |
@click.option("--percent_free", help="how much space to keep in <folder>'s filesystem") | |
@click.option("--exit_after", help="exit process after n seconds (and be restarted by upstart, etc.)") | |
def main(folder, size, percent_free, exit_after): | |
watch_manager = pyinotify.WatchManager() | |
if size: | |
size = size.upper() | |
if size[-1] == "M": | |
factor = 10**6 | |
elif size[-1] == "G": | |
factor = 10**9 | |
else: | |
factor = 1 | |
size = size + "B" | |
size = int(size[:-1]) | |
event_handler = EventHandler(max_size=size * factor) | |
else: | |
event_handler = EventHandler(percent_free=float(percent_free) / 100.0, folder=folder) | |
for dir_, dirs_, files in os.walk(folder): | |
for file in files: | |
path = os.path.join(dir_, file) | |
if os.path.isfile(path): | |
event_handler.update_cache(path) | |
notifier = pyinotify.AsyncNotifier(watch_manager, event_handler) | |
mask = pyinotify.IN_DELETE | pyinotify.IN_CLOSE_NOWRITE | pyinotify.IN_CLOSE_WRITE | |
watch_manager.add_watch(folder, mask, rec=True, auto_add=True) | |
if exit_after: | |
thread = threading.Thread(target=quit_after, args=(int(exit_after),)) | |
thread.start() | |
# HACK: make it certain that we exit on KeyboardInterrupt etc. | |
sys.exit = os._exit | |
asyncore.loop() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment