Last active
June 30, 2020 21:41
-
-
Save lazymutt/77f1cc68dbd7da03a9d9b19ffd97c946 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
# Copyright (c) 2019 University of Utah Student Computing Labs. ################ | |
# All Rights Reserved. | |
# | |
# Permission to use, copy, modify, and distribute this software and | |
# its documentation for any purpose and without fee is hereby granted, | |
# provided that the above copyright notice appears in all copies and | |
# that both that copyright notice and this permission notice appear | |
# in supporting documentation, and that the name of The University | |
# of Utah not be used in advertising or publicity pertaining to | |
# distribution of the software without specific, written prior | |
# permission. This software is supplied as is without expressed or | |
# implied warranties of any kind. | |
################################################################################ | |
# firefox_tab_extractor.py ##################################################### | |
# 11/15/17, v1.1 | |
# | |
# added lz4 decompression support. | |
# https://github.com/avih/dejsonlz4 | |
# | |
# | |
# 04/17/17, v1.0, todd.mcdaniel@utah.edu | |
# | |
# 07/23/19, v1.1, todd.mcdaniel@utah.edu | |
# python3 | |
# | |
# 07/23/19, v1.1.1, todd.mcdaniel@utah.edu | |
# new profile organization navigation, seems brittle? | |
# | |
# 10/15/19, v1.2, todd.mcdaniel@utah.edu | |
# stronger profile searching | |
# | |
# | |
# | |
# | |
# | |
# I open way too many tabs. There, I said it. Admitting the problem is the | |
# first step, programming your way out of it is the second. :) When I realize | |
# that Firefox is slowing my machine to a crawl, it's then difficult and | |
# time-consuming to clean up several hundred (thousand?) tabs. I wanted a solution to | |
# backup everything and start fresh. | |
# | |
# This script will read Firefox's sessionstore.js and save each open window, | |
# each tab from that window, and the history of URLs visited in that tab. | |
# | |
# Usage: | |
# | |
# To process a specific file: | |
# | |
# firefox_tab_extractor.py ~/someplace/something.js | |
# | |
# or use the script without a path to have it decide which sessionstore to use: | |
# | |
# The script will use [profile]/sessionstore.js is Firefox is not running, | |
# or [profile]/sessionstore-backups/recovery.js, if it is. | |
# | |
# In either case, the script will output the results to the running users Desktop folder. | |
# ie ~/Desktop/firefox_tabs_04172017_yourhostname.txt | |
# | |
# Sample output: | |
# | |
# Window #0000 Tab #0000 Entry #0000 Title: Mozilla Firefox Start Page | |
# URL: about:home | |
# Entry #0001 Title: sessionstore.js - Google Search | |
# URL: https://www.google.com/search?q=google&ie=utf-8&oe=utf-8 | |
# Entry #0002 Title: sessionstore.js - Google Search | |
# URL: https://www.google.com/search?q=google&ie=utf-8&oe=utf-8#q=sessionstore.js | |
# Tab #0001 Entry #0000 Title: Apple | |
# URL: https://www.apple.com/ | |
# Tab #0002 Entry #0000 URL: https://github.com/ | |
# | |
# | |
################################################################################ | |
# notes: ####################################################################### | |
# | |
# | |
# | |
# ~/Library/Application Support/Firefox/Profiles/z7ghhgr6.default/sessionstore-backups 12:39pm u0942941@t-mcdaniel-mac #161 >dir | |
# total 42048 | |
# drwx------+ 14 u0942941 staff 476 Nov 15 12:32 ./ | |
# drwx------+ 82 u0942941 staff 2788 Nov 15 12:37 ../ | |
# -rw-------@ 1 u0942941 staff 3012531 Oct 2 17:49 previous.js | |
# -rw-------@ 1 u0942941 staff 1111182 Nov 15 12:10 previous.jsonlz4 | |
# -rw-------@ 1 u0942941 staff 3035348 Oct 2 17:46 recovery.bak | |
# -rw-------@ 1 u0942941 staff 1114914 Nov 15 12:31 recovery.baklz4 | |
# -rw-------@ 1 u0942941 staff 3013282 Oct 2 17:50 recovery.js | |
# -rw-------@ 1 u0942941 staff 1114334 Nov 15 12:32 recovery.jsonlz4 <-------------<< | |
# -rw-------@ 1 u0942941 staff 1877893 Aug 11 11:33 upgrade.js-20170809080250 | |
# -rw-------@ 1 u0942941 staff 2215852 Aug 23 17:05 upgrade.js-20170814073321 | |
# -rw-------@ 1 u0942941 staff 2588636 Aug 29 10:13 upgrade.js-20170824053838 | |
# -rw-------@ 1 u0942941 staff 673837 Oct 3 10:30 upgrade.jsonlz4-20170926190823 | |
# -rw-------@ 1 u0942941 staff 683921 Oct 26 14:43 upgrade.jsonlz4-20171024165158 | |
# -rw-------@ 1 u0942941 staff 1063180 Nov 15 10:49 upgrade.jsonlz4-20171112125346 | |
# | |
# | |
# | |
# /Users/u0942941/Library/Application Support/Firefox/Profiles/vx9orxd2.default-release/prefs.js | |
# /Users/u0942941/Library/Application Support/Firefox/Profiles/ewdkxc0y.default-release-2/prefs.js | |
# /Users/u0942941/Library/Application Support/Firefox/Profiles/z7ghhgr6.default/sessionstore-backups/previous.js | |
# /Users/u0942941/Library/Application Support/Firefox/Profiles/z7ghhgr6.default/sessionstore-backups/recovery.js | |
# /Users/u0942941/Library/Application Support/Firefox/Profiles/z7ghhgr6.default/prefs.js | |
# /Users/u0942941/Library/Application Support/Firefox/Profiles/gbnju89j.default-release-1/prefs.js | |
# | |
# | |
# | |
# [Profile2] | |
# Name=default-release-1 | |
# IsRelative=1 | |
# Path=Profiles/bt1l76mw.default-release-1 | |
# | |
# [Profile1] | |
# Name=default-release | |
# IsRelative=1 | |
# Path=Profiles/xopmy9ah.default-release | |
# | |
# [Profile0] | |
# Name=default | |
# IsRelative=1 | |
# Path=Profiles/cc09pjyv.default | |
# Default=1 | |
# | |
# [General] | |
# StartWithLastProfile=1 | |
# Version=2 | |
# | |
# [Install428590CF97E53F58] | |
# Default=Profiles/bt1l76mw.default-release-1 | |
# Locked=1 | |
# | |
# [Install7DDF5FF9633F2A77] | |
# Default=Profiles/xopmy9ah.default-release | |
# Locked=1 | |
# | |
# [InstallA45A70AC2D941EBC] | |
# Default=Profiles/cc09pjyv.default | |
# Locked=1 | |
# | |
# | |
# | |
# ./Profiles/bt1l76mw.default-release-1/sessionstore-backups: | |
# total 19808 | |
# drwx------@ 7 u0942941 staff 224 Sep 1 12:11 ./ | |
# drwx------@ 57 u0942941 staff 1824 Aug 31 20:46 ../ | |
# -rw-------@ 1 u0942941 staff 2476197 Aug 23 12:57 previous.jsonlz4 | |
# -rw-------@ 1 u0942941 staff 2881129 Sep 1 12:11 recovery.baklz4 | |
# -rw-------@ 1 u0942941 staff 2881140 Sep 1 12:11 recovery.jsonlz4 <---------<<< | |
# -rw-------@ 1 u0942941 staff 3576 Jul 19 12:07 upgrade.jsonlz4-20190717172542 | |
# -rw-------@ 1 u0942941 staff 1889566 Aug 16 11:42 upgrade.jsonlz4-20190813150448 | |
# | |
# | |
################################################################################ | |
from __future__ import print_function | |
import json | |
import sys | |
import os | |
import pwd | |
import socket | |
import time | |
import subprocess | |
import datetime | |
import pathlib | |
def main(): | |
hostname = (socket.gethostname()).split(".")[0] | |
user_dir = pwd.getpwuid(os.getuid())[5] | |
if len(sys.argv) > 1: | |
sessionstore_path = sys.argv[1] | |
if not os.path.exists(sessionstore_path): | |
print("File doesn't exist. Exiting.") | |
sys.exit() | |
else: | |
try: | |
working_dir = user_dir + '/Library/Application Support/Firefox/' | |
with open(working_dir + '/profiles.ini', 'r') as ff_pref_file: | |
ff_prefs = ff_pref_file.read() | |
ff_prefs = [x for x in ff_prefs.split('\n') if x] | |
working_path = '' | |
possible_paths = set() | |
possible_sessionstores = [] | |
for item in ff_prefs: | |
if 'Profiles' in item: | |
possible_paths.add(working_dir + item.split('=')[1]) | |
for count, working_path in enumerate(possible_paths): | |
# ~/Library/Application Support/Firefox/Profiles/bt1l76mw.default-release-1/sessionstore-backups/recovery.jsonlz4 | |
if os.path.exists(working_path + '/sessionstore-backups/recovery.jsonlz4'): | |
possible_sessionstores.append(working_path + '/sessionstore-backups/recovery.jsonlz4') | |
elif os.path.exists(working_path + '/sessionstore.js'): | |
possible_sessionstores.append(working_path + '/sessionstore.js') | |
else: | |
if os.path.exists(working_path + '/sessionstore-backups/recovery.js'): | |
possible_sessionstores.append(working_path + '/sessionstore-backups/recovery.js') | |
except Exception as this_exception: | |
print(this_exception) | |
quit() | |
print(len(possible_sessionstores)) | |
for sessionstore_path in possible_sessionstores: | |
profile_id = sessionstore_path.split('Profiles/')[-1].split('.')[0] | |
output_path = user_dir + '/Desktop/firefox_tabs_' + time.strftime("%m%d%Y_%H%M%S") + '_' + hostname + '_' + profile_id + '.txt' | |
print("Using sessionstore: \033[4m%s\033[0m\n" % sessionstore_path) | |
print("Output to: \033[4m%s\033[0m\n" % output_path) | |
if os.path.exists(output_path): | |
overwrite_check = input("File already exists, overwrite [Yy to continue]: ") | |
if overwrite_check.lower() != "y": | |
print("Exiting.") | |
quit() | |
if 'lz4' in sessionstore_path: | |
raw_ps = subprocess.check_output(['/usr/local/bin/dejsonlz4', sessionstore_path]) | |
consumed_sessionstore = json.loads(raw_ps) | |
else: | |
with open(sessionstore_path) as data_file: | |
consumed_sessionstore = json.load(data_file) | |
with open(output_path, 'w') as output_file: | |
for window_index, window in enumerate(consumed_sessionstore['windows']): | |
window_string = "Window #{:04d} ".format(window_index) | |
blank_window_string = " " * len(window_string) | |
for tab_index, tab in enumerate(window['tabs']): | |
tab_date_accessed = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(tab['lastAccessed']/1000)) | |
tab_string = "Tab #{:04d} ".format(tab_index) | |
blank_tab_string = " " * len(tab_string) | |
for entry_index, entries in enumerate(tab['entries']): | |
entry_string = "Entry #{:04d} ".format(entry_index) | |
blank_entry_string = " " * len(entry_string) | |
blank_overlap = blank_window_string + blank_tab_string + blank_entry_string | |
if entry_index == 0: | |
if tab_index == 0: | |
try: | |
print("{}{}{}Title: {}\n{} URL: {}".format(window_string, tab_string, entry_string, entries['title'], blank_overlap, entries['url']), file=output_file) | |
except: | |
print("{}{}{}URL: {}".format(window_string, tab_string, entry_string, entries['url']), file=output_file) | |
else: | |
try: | |
print("{}{}{}Title: {}\n{} URL: {}".format(blank_window_string, tab_string, entry_string, entries['title'], blank_overlap, entries['url']), file=output_file) | |
except: | |
print("{}{}{}URL: {}".format(blank_window_string, tab_string, entry_string, entries['url']), file=output_file) | |
else: | |
try: | |
print("{}{}{}Title: {}\n{} URL: {}".format(blank_window_string, blank_tab_string, entry_string, entries['title'], blank_overlap, entries['url']), file=output_file) | |
except: | |
print("{}{}{}URL: {}".format(blank_window_string, blank_tab_string, entry_string, entries['url']), file=output_file) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment