Skip to content

Instantly share code, notes, and snippets.

@davidmintz
Last active December 28, 2018 16:41
Show Gist options
  • Save davidmintz/4eeb54f86a55bb351aa05c5f9db5bf06 to your computer and use it in GitHub Desktop.
Save davidmintz/4eeb54f86a55bb351aa05c5f9db5bf06 to your computer and use it in GitHub Desktop.
Python 2.x script for downloading, archiving and removing emails from an IMAP folder
"""
download, store and delete old messages from a folder on an IMAP account.
kind of crude in that you have to hard-code values that could be taken as
command-line options, and who knows what else is wrong with this as my
first Python effort of any consequence.
"""
# to do: make python3 compatible? process command line options instead of hard-coding
import imaplib, email, mailbox, re, os.path, logging, sys, time
log = logging
log.basicConfig(filename="mail-archive.log",format='%(asctime)s %(levelname)s: %(message)s',datefmt="%Y-%m-%d %I:%M:%S %p",level=logging.DEBUG)
# cutoff year. anything older than 01-Jan of 'until_year' gets downloaded and deleted.
until_year = "2017"
# IMAP connection parameters
hostname = 'your_imap_host'
user = 'your_username'
password = 'your_password'
# name of folder to prune
folder_name = 'folder-to-prune'
# path to mbox files
mbox_path = "/path/to/your/email/archive"
# regexp for parsing year from email date header
pattern = re.compile(r'\b201[\d]\b')
log.info('starting. cutoff is '+until_year)
connection = imaplib.IMAP4(hostname)
connection.login(user, password)
type, data = connection.select(folder_name);
num_messages = data[0]
log.info("{} messages in the damn box".format(num_messages))
type, message_numbers_string = connection.search(None, 'ALL')
# print "{} is the response we got".format(type)
nums = message_numbers_string[0].split();
mbox = None
deleted = 0
# http://stackoverflow.com/questions/29432981/saving-imap-messages-with-python-mailbox-module
for n in nums:
response = None
try:
response, msg = connection.fetch(n,'(RFC822)')
except Exception as e:
log.warning("caught exception fetching message {}, will try again".format(n))
for i in range(1,6):
# try again
log.info("sleeping for a few seconds")
print ("exception caught, trying again...")
time.sleep(5)
try:
response, msg = connection.fetch(n,'(RFC822)')
if (response == "OK"):
log.info("retry #{} worked, moving on".format(i))
break
except:
log.warning("shit failed again at retry#".format(i))
if not response:
print("shit is not working: {}".format(e.message))
print ("reluctantly moving on")
# "Returned data are tuples of message part envelope and data"
data = msg[0][1]
for response_part in msg:
if isinstance(response_part, tuple):
msg_obj = email.message_from_string(response_part[1])
date_header = msg_obj['date']
year = pattern.search(date_header).group()
sys.stdout.write('year: {}, processing: {} of {}\r'.format(year, n, num_messages, ))
sys.stdout.flush()
if year >= until_year:
# log.info("{} >= cutoff year {}, skipping".format(year, until_year))
continue
path = mbox_path + "/" + year
if not os.path.isfile(path) :
log.info("creating mbox: "+year)
f = open(path,'w')
f.close()
# is there a mailbox instance?
if not mbox:
mbox = mailbox.mbox(path)
# is it the right one?
if year not in mbox._path:
# no, time for a new mbox instance
# log.info("{} is not for {}, instantiating a new mbox".format(mbox._path,year))
mbox.close()
mbox = mailbox.mbox(path)
mbox.add(data)
connection.store(n, '+FLAGS', '\\Deleted')
deleted += 1
log.info("deleted {} messages".format(deleted))
log.info("expunging messages")
connection.expunge()
log.info("closing connection, logging out")
connection.close()
connection.logout()
log.info("done")
exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment