Skip to content

Instantly share code, notes, and snippets.

@camilosampedro
Created July 24, 2024 11:35
Show Gist options
  • Save camilosampedro/7b64155d23d5f6ac8799fa9b0f3e7c28 to your computer and use it in GitHub Desktop.
Save camilosampedro/7b64155d23d5f6ac8799fa9b0f3e7c28 to your computer and use it in GitHub Desktop.
Bitwarden Json export cleanup script
import json
from datetime import datetime
input_file = 'bitwarden_export_original.json'
output_file = 'output.json'
# These columns are ignored when comparing if two entries are the same.
# - Ids are unique, so always different
# - revisionDate and creationDate tend to be slighly apart between entries
columns_to_ignore = ["id", "revisionDate", "creationDate"]
# Function to remove duplicate entries in Bitwarden Json export file
def remove_duplicates(json_obj):
unique_items = []
# Record of visited elements.
seen = {}
for item in json_obj["items"]:
item_str = str({k: v for k, v in item.items() if k not in columns_to_ignore})
if item_str not in seen:
# First time we see this entry
seen[item_str] = item
else:
# Now we deduplicate. We take whichever has a later revisionDate.
# At this point entries are identical so this might not be needed, but it may be good to just preserve the later date for the record.
existing_item = seen[item_str]
existing_date = datetime.fromisoformat(existing_item["revisionDate"].replace("Z", "+00:00"))
new_date = datetime.fromisoformat(item["revisionDate"].replace("Z", "+00:00"))
if new_date > existing_date:
seen[item_str] = item
unique_items = list(seen.values())
discarded_count = len(json_obj["items"]) - len(unique_items)
json_obj["items"] = unique_items
print(f"Discarded {discarded_count} duplicate items. Total items: {len(unique_items)}")
return json_obj
with open(input_file) as f:
json_obj = json.load(f)
json_obj = remove_duplicates(json_obj)
with open(output_file, 'w') as f:
json.dump(json_obj, f, indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment