Last active
May 17, 2021 07:17
-
-
Save seanbreckenridge/9a5532561bfe24268c368366e5360cfe to your computer and use it in GitHub Desktop.
cleans up my MAL export so thats its importable to anilist
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# cleans up my MAL export according to this thread: | |
# https://anilist.co/forum/thread/3291 | |
# so that its importable | |
# otherwise the graphql error fails with a 413 request entity too large | |
# since the anilist page just inlines the XML contents into the gql query | |
import sys | |
import typing | |
from pathlib import Path | |
import lxml.etree as ET | |
import click | |
KEEP_ATTRS = set( | |
[ | |
"series_animedb_id", | |
"series_title", | |
"my_watched_episodes", | |
"my_start_date", | |
"my_finish_date", | |
"my_score", | |
"my_status", | |
] | |
) | |
def fix(from_file: Path) -> str: | |
tree = ET.parse(str(from_file)) | |
root = tree.getroot() | |
root.remove(root.find("myinfo")) | |
for anime_tag in root.findall("anime"): | |
for anime_info in anime_tag: | |
if anime_info.tag not in KEEP_ATTRS: | |
anime_tag.remove(anime_info) | |
return ET.tostring(root) | |
@click.command() | |
@click.argument("XML_FILE") | |
def main(xml_file: str): | |
""" | |
pass the xml file from MAL as the first argument | |
prints results to STDOUT | |
""" | |
assert xml_file.endswith(".xml"), "Pass the xml file, not the .xml.gz file" | |
click.echo(fix(Path(xml_file))) | |
if __name__ == "__main__": | |
main(prog_name="anilist_413") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Stills time outs...