Created
August 13, 2019 04:27
-
-
Save bbkane/1933735510ff7f44620cf5c12b39cc2c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# This is just to find out how firefox bookmarks.json files are structured | |
__author__ = "Benjamin Kane" | |
__version__ = "0.1.0" | |
from collections import Counter, defaultdict | |
from functools import reduce | |
import json | |
import operator as op | |
def collect_types(counter, obj): | |
counter[(obj['type'], obj['typeCode'])] += 1 | |
for child in obj.get('children', []): | |
collect_types(counter, child) | |
return counter | |
def breadth_first(obj): | |
yield obj | |
for child in obj.get('children', []): | |
yield from breadth_first(child) | |
def depth_first(obj): | |
for child in obj.get('children', []): | |
yield from breadth_first(child) | |
yield obj | |
def categorize_keys(bookmarks): | |
stuff = defaultdict(Counter) | |
for obj in breadth_first(bookmarks): | |
stuff[obj['type']][frozenset(obj.keys())] +=1 | |
intersections = [] | |
for type_, counter in stuff.items(): | |
print(type_) | |
union = reduce(op.or_, counter.keys()) | |
print(f' union: {union}') | |
intersection = reduce(op.and_, counter.keys()) | |
intersections.append(intersection) | |
print(f' intersection: {intersection}') | |
print(f' outliers: {union - intersection}') | |
intersection = reduce(op.and_, intersections) | |
print(f'total intersection: {intersection}') | |
# Okay, this isn't working right now... | |
def translate(bookmarks, folder_titles, acc): | |
if bookmarks['type'] == 'text/x-moz-place': | |
acc.append({'title': bookmarks['title'], 'tags': folder_titles}) | |
# print(f'Title: {bookmarks["title"]}') | |
# print(f'Tags: {folder_titles}') | |
# print() | |
else: | |
if bookmarks['type'] == 'text/x-moz-place-container': | |
folder_titles.add(bookmarks['title']) | |
for child in bookmarks.get('children', []): | |
translate(child, folder_titles.copy(), acc) | |
def main(): | |
with open('./bookmarks-2019-05-28.json') as fp: | |
bookmarks = json.load(fp) | |
# counter = collect_types(Counter(), bookmarks) | |
# # Counter({('text/x-moz-place', 1): 1413, ('text/x-moz-place-container', 2): 59, ('text/x-moz-place-separator', 3): 7}) | |
# So we know that 'typeCode' correspond to 'type' | |
# Lets see what keys each object contains sorted by type | |
# categorize_keys(bookmarks) | |
# # text/x-moz-place-container | |
# # union: frozenset({'lastModified', 'id', 'title', 'type', 'typeCode', 'dateAdded', 'guid', 'index', 'children', 'annos', 'root'}) | |
# # intersection: frozenset({'lastModified', 'dateAdded', 'id', 'guid', 'index', 'title', 'type', 'typeCode'}) | |
# # outliers: frozenset({'children', 'annos', 'root'}) | |
# # text/x-moz-place | |
# # union: frozenset({'lastModified', 'id', 'title', 'charset', 'uri', 'iconuri', 'type', 'typeCode', 'dateAdded', 'guid', 'index', 'tags', 'annos'}) | |
# # intersection: frozenset({'lastModified', 'dateAdded', 'id', 'guid', 'index', 'title', 'uri', 'type', 'typeCode'}) | |
# # outliers: frozenset({'tags', 'charset', 'iconuri', 'annos'}) | |
# # text/x-moz-place-separator | |
# # union: frozenset({'lastModified', 'dateAdded', 'id', 'guid', 'index', 'title', 'type', 'typeCode'}) | |
# # intersection: frozenset({'lastModified', 'dateAdded', 'id', 'guid', 'index', 'title', 'type', 'typeCode'}) | |
# # outliers: frozenset() | |
# categorize_keys(bookmarks) | |
acc = [] | |
translate(bookmarks, set(), acc) | |
for item in acc: | |
print(item) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment