Skip to content

Instantly share code, notes, and snippets.

@3nws
Created August 16, 2024 21:29
Show Gist options
  • Save 3nws/fb0eaedc7fea2bd5d58c036a21913d57 to your computer and use it in GitHub Desktop.
Save 3nws/fb0eaedc7fea2bd5d58c036a21913d57 to your computer and use it in GitHub Desktop.
Script to fetch all chapters of an arc on witchculttranslation.com, create an epub and upload to dropbox.
import html5lib # type: ignore
import requests
import os
import dropbox
from ebooklib import epub
from bs4 import BeautifulSoup
from config import app_folder, app_key, app_secret, oauth2_refresh_token
session = requests.session()
dbx = dropbox.Dropbox(
app_key=app_key,
app_secret=app_secret,
oauth2_refresh_token=oauth2_refresh_token
)
try:
dbx.files_get_metadata(app_folder)
except dropbox.exceptions.ApiError as e:
if type(e.error) == dropbox.files.GetMetadataError:
dbx.files_create_folder(app_folder)
arc = 9
chapters_url = f"https://witchculttranslation.com/arc-{arc}/"
def upload_to_dbox(epub_file_name):
with open(epub_file_name, "rb",) as f:
dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")
def create_epub(title, url, content, images, cover_url=None):
book = epub.EpubBook()
cover = session.get(cover_url, stream=True)
chapter_slug = list(filter(None, url.split("/")))[-1]
chapter_id = chapter_slug
chapter_title = title
book.set_identifier(chapter_id)
book.set_title(chapter_title)
book.set_language("en")
book.set_cover("image.jpg", (cover.content))
del cover
spine = []
chapter = epub.EpubHtml(title=chapter_title, file_name=f"{chapter_slug}.xhtml", lang="en", content=content)
book.add_item(chapter)
spine.append(chapter)
for idx, image in enumerate(images):
image_content = open(image, "rb").read()
img = epub.EpubImage(
uid=f"image_{idx}",
file_name=image,
content=image_content,
)
book.add_item(img)
os.remove(image)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
style = "BODY {color: white;}"
nav_css = epub.EpubItem(
uid="style_nav",
file_name="style/nav.css",
media_type="text/css",
content=style,
)
book.add_item(nav_css)
book.spine = ["nav", *spine]
epub_file_name = f"Re: Zero Web Novel Arc {arc} - {chapter_title}.epub".replace(":", "")
epub.write_epub(epub_file_name, book, {})
try:
upload_to_dbox(epub_file_name)
except dropbox.exceptions.ApiError as e:
if type(e.error) != dropbox.files.UploadError:
raise e
def get_entry_content(url):
res = session.get(url)
if res.status_code != 200:
return
page = res.content
soup = BeautifulSoup(page.decode("utf-8"), "html5lib")
return soup.select_one(".entry-content")
def main():
entry_content = get_entry_content(chapters_url)
chapters = entry_content.select("li")
for chapter in chapters:
a_tag = chapter.findChild()
chapter_url = a_tag.get("href")
chapter_title = a_tag.text
entry_content = get_entry_content(chapter_url)
p_image = entry_content.find("p").findChild()
if p_image is None:
next_sibling = entry_content.find("p").find_next_sibling()
while next_sibling.name != "p" and next_sibling.findChild("img") is None:
next_sibling = next_sibling.find_next_sibling()
p_image = next_sibling.findChild()
cover_url = (p_image.get("src"))
images = []
for idx, tag in enumerate(entry_content.contents[:10]):
if (tag.find("img")) and tag.find("img") != -1:
image = tag.find("img")
image_url = image.get("src")
temp = session.get(image_url, stream=True)
image_path = f"images/local_image_{idx}.jpg"
os.makedirs(os.path.dirname(image_path), exist_ok=True)
with open(image_path, "wb") as f:
f.write(temp.content)
images.append(image_path)
image["src"] = image_path
del temp
content = "".join([str(tag) for tag in entry_content.contents])
create_epub(chapter_title, chapter_url, content, images, cover_url)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment