-
-
Save timprepscius/06a73a615228beee0f3c8242ad2eebc2 to your computer and use it in GitHub Desktop.
Export iOS/iMessage chat logs to HTML or text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys, getopt | |
import urllib | |
import urlparse | |
import base64 | |
import mimetypes | |
import cgi | |
import sqlite3 | |
import os | |
import errno | |
from os import path | |
from shutil import copy2 | |
# TODO: | |
# - Add auto-linking | |
# - Export video media | |
# - Match chat IDs up to names using Contacts.app SQLite db | |
CHAT_DB = path.expanduser("~/Library/Messages/chat.db") | |
ATTACHDIR = "export-Attachments" | |
# Apple's epoch starts on January 1st, 2001 for some reason... | |
# cf. http://apple.stackexchange.com/questions/114168 | |
EPOCH = 978307200 | |
def list_chats(chatFile): | |
db = sqlite3.connect(chatFile) | |
cursor = db.cursor() | |
cursor2 = db.cursor() | |
print "Below is a list of IDs and their associated message counts:" | |
rows = cursor.execute(""" | |
SELECT DISTINCT chat_identifier | |
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier; | |
""") | |
# Loop through each ID and print out the ID and the number of messages | |
# associated with that ID | |
for row in rows: | |
# See how many messages there are for this ID | |
chat_id = row[0] | |
sql = """ | |
SELECT COUNT(*) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = \"%s\"; | |
""" % (chat_id) | |
sql = cursor2.execute(sql) | |
numRows = sql.fetchone()[0] | |
if numRows == 0: continue | |
# Grab the date of the first message | |
sql = cursor2.execute(""" | |
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date LIMIT 1; | |
""", (EPOCH, chat_id)) | |
firstDate = sql.fetchone()[0] | |
# Grab the date of the last message | |
sql = cursor2.execute(""" | |
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date DESC LIMIT 1; | |
""", (EPOCH, chat_id)) | |
lastDate = sql.fetchone()[0] | |
print " " + chat_id + " (" + str(numRows) + " messages, " + firstDate + " to " + lastDate + ")" | |
def exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment): | |
oldstdout = sys.stdout | |
db = sqlite3.connect(chatFile) | |
db.row_factory = sqlite3.Row | |
cursor = db.cursor() | |
cursor2 = db.cursor() | |
if len(prettyID) > 0: | |
prettyPrint = 1 | |
else: | |
prettyPrint = 0 | |
prettyID = chat_id | |
# Grab the date of the first message | |
sql = cursor.execute(""" | |
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date LIMIT 1; | |
""", (EPOCH, chat_id)) | |
row = sql.fetchone() | |
# First make sure data actually exists for this chat_id | |
if row == None: | |
print "No chat logs found for '%s'!" % (chat_id) | |
return | |
# Now grab the date of the first message | |
firstDate = row[0] | |
# Grab the date of the last message | |
sql = cursor.execute(""" | |
SELECT substr(datetime(m.date + ?, 'unixepoch', 'localtime'), 0, 11) | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date DESC LIMIT 1; | |
""", (EPOCH, chat_id)) | |
lastDate = sql.fetchone()[0] | |
# Grab all the rows for this chat_id | |
rows = cursor.execute(""" | |
SELECT datetime(m.date + ?, 'unixepoch', 'localtime') as fmtdate, | |
m.is_from_me, | |
m.text, | |
h.id, | |
a.filename, | |
a.mime_type, | |
a.total_bytes, | |
a.guid, | |
a.transfer_name, | |
m.ROWID | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
INNER JOIN handle as h | |
ON h.ROWID = m.handle_ID | |
LEFT JOIN message_attachment_join AS ma | |
ON ma.message_id = m.ROWID | |
LEFT JOIN attachment as a | |
ON a.ROWID = ma.attachment_id | |
WHERE c.chat_identifier = ? | |
ORDER BY m.date; | |
""", (EPOCH, chat_id)) | |
if HTML: | |
logExt = ".html" | |
else: | |
logExt = ".txt" | |
if separate: | |
# Output each message to their own file | |
sys.stdout = open("Export-" + chat_id.replace("+", "") + logExt, 'w') | |
else: | |
# Output each message to one file | |
sys.stdout = oldstdout | |
# See if this is a group chat instead of an individual conversation | |
if prettyID[:4] == "chat": | |
groupChat = 1 | |
# Get the handles that are part of the chat | |
handles = cursor2.execute(""" | |
SELECT DISTINCT h.id | |
FROM chat as c | |
INNER JOIN chat_message_join AS cm | |
ON cm.chat_id = c.ROWID | |
INNER JOIN message AS m | |
ON m.ROWID = cm.message_id | |
INNER JOIN handle as h | |
ON h.ROWID = m.handle_id | |
WHERE c.chat_identifier = ? | |
ORDER BY h.id; | |
""", (chat_id,)) | |
chat_handles = "" | |
prevHandle = "" | |
chatPadding = 0 | |
for handle in handles: | |
chat_handles += handle[0] + ", " | |
# Set chatPadding to the longest chat_handle | |
if len(handle[0]) > len(prevHandle): | |
chatPadding = len(handle[0]) | |
prevHandle = handle[0] | |
# Strip off the last comma | |
chat_handles = chat_handles[:-2] | |
if len(chat_handles) == 0: | |
chat_handles = "NOBODY" | |
chatTitle = "Group conversation with %s from %s to %s" % (chat_handles, firstDate, lastDate) | |
chatEnd = "End of group conversation with %s" % (chat_handles) | |
else: | |
groupChat = 0 | |
chatTitle = "Chat transcript with %s from %s to %s" % (prettyID, firstDate, lastDate) | |
chatEnd = "* End of chat transcript with %s *" % (prettyID) | |
chatPadding = len(prettyID) | |
# | |
# Start printing | |
# | |
if HTML == 0: | |
# Text format | |
stars = "*" * (len(chatTitle) + 4) | |
chatTitle = stars + "\n* " + chatTitle + " *\n" + stars | |
print(chatTitle.encode("utf8")) | |
for row in rows: | |
# Skip this message if the text is blank (happens occasionally) | |
if row[2] is None: continue | |
# Let's pad "me" so it matches the chat_id length | |
if groupChat: | |
who = "me" if row[1] is 1 else row[3] | |
else: | |
who = "me" if row[1] is 1 else prettyID | |
date = row[0] | |
text = row[2].strip() | |
# Get the attachment information if one exists | |
attachFilename = row[4] if row[4] is not None else "" | |
hasAttachment = 1 if len(attachFilename) > 0 else 0 | |
if hasAttachment: | |
attachDataSize = format(row[6], ',d') | |
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>" | |
line = "%s @ %s: %s" % (who.rjust(chatPadding, ' '), date, text) | |
print(line.encode("utf8")) | |
stars = "*" * (len(chatEnd) + 4) | |
chatEnd = stars + "\n* " + chatEnd + " *\n" + stars + "\n" | |
print(chatEnd.encode("utf8")) | |
else: | |
# HTML format | |
# Start of a new chat transcript | |
printHTMLHeader() | |
print "<div class=\"message date1\">" | |
print "<div align=\"center\"><b>" + chatTitle + "</b></div>" | |
prevDate = "" | |
for row in rows: | |
# Skip this message if the text is blank (happens occasionally) | |
if row[2] is None: continue | |
guid = row[9] | |
datetime = row[0] | |
date = datetime[:11] | |
time = datetime[11:] | |
text = row[2].strip() | |
attachFilename = row[4] if row[4] is not None else "" | |
hasAttachment = 1 if len(attachFilename) > 0 else 0 | |
# Use "me" or the pretty ID specified | |
if groupChat: | |
who = "me" if row[1] is 1 else row[3] | |
else: | |
who = "me" if row[1] is 1 else prettyID | |
# See if an attachment is part of this message | |
if hasAttachment: | |
attachment = path.expanduser(attachFilename) | |
attachGUID = row[7] | |
attachName = row[8] | |
mimeType = row[5] | |
if mimeType is None: | |
mimeType = "unknown"; | |
# Save the attachments if requested | |
if keepAttachment: | |
# Create the attachment dir if it doesn't exist | |
try: | |
os.makedirs(ATTACHDIR) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
attachmentFound = 1 if os.path.exists(attachment) else 0 | |
if attachmentFound: | |
# Attachment found | |
attachDataSize = format(row[6], ',d') | |
# Copy the attachment | |
newFile = ATTACHDIR + "/" + attachGUID + "-" + attachName | |
newFile = newFile.replace(" ", "_") | |
copy2(attachment, newFile) | |
if "video/" in mimeType: | |
attachmentHTML = "Video: " + attachName + "<BR><DIV ALIGN=\"center\"><video width=\"800\" height=\"540\" controls preload=\"none\"><source id=\"" + attachName + "\" src=\"" + newFile + "\" mimeType=\"" + mimeType + "\"></video></DIV>" | |
elif "audio/" in mimeType: | |
attachmentHTML = "Audio: " + attachName + "<BR><DIV ALIGN=\"center\"><audio controls preload=\"none\"><source id=\"" + attachName + "\" src=\"" + attachName + "\" src=\"" + newFile + "\" mimeType=\"" + mimeType + "\"></audio></DIV>" | |
else: | |
attachmentHTML = "<a href=\"" + newFile + "\" target=\"_blank\" border=\"0\"><img id=\"" + attachName + "\" src=\"" + newFile + "\" mimetype=\"" + mimeType + "\" width=\"150\" align=\"top\"></a><br>" | |
# Only newline+center the image if text was included | |
#if len(text) > 200: | |
# attachmentHTML = "<BR><DIV ALIGN=\"center\">" + attachmentHTML + "</DIV>" | |
else: | |
# No attachment found | |
attachDataSize = 0 | |
attachmentHTML = cgi.escape("<Attachment not found: " + attachFilename + ">") | |
if hasAttachment: | |
text = cgi.escape(text) + attachmentHTML | |
else: | |
text = cgi.escape(text) | |
else: | |
# Not keeping attachments | |
attachDataSize = format(row[6], ',d') | |
if hasAttachment: | |
text = text + "<Attachment removed: " + attachFilename + " (Bytes: " + attachDataSize + " KB)>" | |
text = cgi.escape(text) | |
else: | |
text = text | |
text = cgi.escape(text) | |
# See if this is a new date | |
if date == prevDate: | |
# This is the same date as last time, just print the user, time and text | |
line = "<b>%s @ %s:</b> %s " % (who.rjust(chatPadding, ' '), time, text) | |
else: | |
# This is a new date, start a new bubble | |
line = "</div><div class=\"message date\"><b>%s</b><hr><b>%s @ %s:</b> %s" % (date, who.rjust(chatPadding, ' '), time, text) | |
# Output the HTML | |
print(line.encode("utf8")) | |
prevDate = date | |
# End of the current chat transcript | |
print "</div><div class=\"message\" align=\"center\"><b>" + chatEnd + "</b></div>" | |
# | |
# exportAll strictly loops through all the chat conversations and calls | |
# exportID accordingly | |
# | |
def exportAll(chatFile, HTML, separate, keepAttachment): | |
oldstdout = sys.stdout | |
db = sqlite3.connect(chatFile) | |
cursor = db.cursor() | |
rows = cursor.execute(""" | |
SELECT DISTINCT chat_identifier | |
FROM chat WHERE is_archived = 0 ORDER BY chat_identifier; | |
""") | |
# Loop through each ID and export to a file/stdout | |
for row in rows: | |
chat_id = row[0] | |
# Export the chat | |
exportID(chatFile, row[0], "", HTML, separate, keepAttachment) | |
sys.stdout = oldstdout | |
def printHTMLHeader(): | |
print(""" | |
<meta charset=\"utf-8\"> | |
<style> | |
body { margin: 0; padding: 0; } | |
.message { | |
white-space: pre-wrap; | |
max-width: 800px; | |
padding: 10px; | |
margin: 10px; | |
font-family: "Courier", Calibri, Tahoma; | |
font-size: 14px; | |
} | |
.date { background-color: #EEE; } | |
.date1 { background-color: #A6DBFF; } | |
</style> | |
""") | |
def usage(): | |
print """ | |
Usage: | |
%s [-hlastk] [--file <filename>] [--id <chat id>] [--prettyID <pretty ID>] | |
-h, --help Show this message | |
-f, --file Alternate SQLite DB to use | |
-l, --list List all non-archived chats | |
-a, --all Export all non-archived chats | |
-s, --separate During export, write chat to a separate file | |
-c, --id Specify the ID to export | |
-p, --prettyID Specify the pretty ID to use when exporting a single chat | |
-t Output chat in TXT format instead of HTML | |
-k Keep attachments (stored in ./export-attachments) | |
""" % (__file__) | |
def main(): | |
chatFile = CHAT_DB | |
chat_id = "none" | |
prettyID = "" | |
export = "id" | |
HTML = 1 | |
separate = 0 | |
listchats = 0 | |
reqArg = 0 | |
keepAttachment = 0 | |
try: | |
opts, args = getopt.getopt(sys.argv[1:], "hf:altsc:p:k", ["help", "file=", "all", "list", "separate", "id=", "prettyID="]) | |
except getopt.GetoptError as err: | |
print str(err) | |
usage() | |
sys.exit(2) | |
# If no args are given, show usage | |
if len(sys.argv) == 1: | |
usage() | |
sys.exit() | |
for opt, arg in opts: | |
if opt in ("-h", "--help"): | |
usage() | |
sys.exit() | |
elif opt in ("-a", "--all"): | |
export = "all" | |
reqArg = 1 | |
elif opt in ("-l", "--list"): | |
listchats = 1 | |
elif opt in ("-c", "--id"): | |
chat_id = arg | |
reqArg = 1 | |
elif opt in ("-p", "--prettyID"): | |
prettyID = arg | |
elif opt == "-t": | |
HTML = 0 | |
elif opt in ("-s", "--separate"): | |
separate = 1 | |
elif opt in ("-f", "--file"): | |
chatFile = arg | |
elif opt in ("-k"): | |
keepAttachment = 1 | |
else: | |
assert False, "unhandled option" | |
if not os.path.isfile(chatFile): | |
print "'" + chatFile + "' not found!" | |
sys.exit() | |
if listchats: | |
list_chats(chatFile) | |
sys.exit() | |
# Make sure a required argument was used | |
if not reqArg: | |
print "You need to specify either [-i] or [-a]" | |
usage() | |
sys.exit() | |
# See if we're exporting all, or just one conversation | |
if export == "all": | |
exportAll(chatFile, HTML, separate, keepAttachment) | |
else: | |
if chat_id == "none": | |
print "The [-t] argument requires either [-i] or [-a]" | |
usage() | |
sys.exit() | |
# Export a single ID since '-a' gets invoked immediately and then exits | |
printHTMLHeader() | |
exportID(chatFile, chat_id, prettyID, HTML, separate, keepAttachment) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment