Skip to content

Instantly share code, notes, and snippets.

@gmelodie
Created August 6, 2024 23:09
Show Gist options
  • Save gmelodie/0851232ddae68743d043066f11907d4f to your computer and use it in GitHub Desktop.
Save gmelodie/0851232ddae68743d043066f11907d4f to your computer and use it in GitHub Desktop.
Remove entire publications from ACM Digital Library's exported .bib file
from pybtex.database.input import bibtex
from pybtex.database.output.bibtex import Writer
from pybtex.errors import set_strict_mode
import sys
if __name__ == '__main__':
if len(sys.argv) != 2:
print('usage: python3 detrashify_acm.py /path/to/acm.bib')
exit(1)
filename = sys.argv[1]
print(f'Detrashifying {filename}')
# allow duplicate bibs
set_strict_mode(False)
# parse acm.bib
parser = bibtex.Parser()
bib_data = parser.parse_file(filename)
old_len = len(bib_data.entries.items())
# remove entries with authors == None
keys_to_remove = []
excluded = 0
for cite_key, entry in bib_data.entries.items():
if 'author' not in entry.persons or len(entry.persons['author']) == 0: # remove
print(f"Filtering out {entry.fields['title'].split(':')[0]}")
keys_to_remove.append(cite_key)
excluded += 1
for key in keys_to_remove:
del bib_data.entries[key]
writer = Writer()
with open("acm-updated.bib", 'w', encoding='utf-8') as file:
writer.write_stream(bib_data, file)
new_len = len(bib_data.entries.items())
print(f'excluded {excluded} entries without authors')
print(f'Old len: {old_len}\t New len: {new_len}')
print(f'Updated file: acm-updated.bib')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment