3nws · August 16, 2024 21:31
diff --git a/wct.py b/wct.py
 import html5lib  # type: ignore

 import requests
 import os
 import dropbox

 from ebooklib import epub
 from bs4 import BeautifulSoup
 from config import app_folder, app_key, app_secret, oauth2_refresh_token

 session = requests.session()

 dbx = dropbox.Dropbox(
    app_key=app_key,
    app_secret=app_secret,
    oauth2_refresh_token=oauth2_refresh_token
 )
            

 try:
    dbx.files_get_metadata(app_folder)
 except dropbox.exceptions.ApiError as e:
    if type(e.error) == dropbox.files.GetMetadataError:
        dbx.files_create_folder(app_folder)

 arc = 9

 chapters_url = f"https://witchculttranslation.com/arc-{arc}/"

 def upload_to_dbox(epub_file_name):
    with open(epub_file_name, "rb",) as f:
        dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")

 def create_epub(title, url, content, images, cover_url=None):
    book = epub.EpubBook()
    cover = session.get(cover_url, stream=True)

    chapter_slug = list(filter(None, url.split("/")))[-1]
    chapter_id = chapter_slug
    chapter_title = title

    book.set_identifier(chapter_id)
    book.set_title(chapter_title)
    book.set_language("en")
    book.set_cover("image.jpg", (cover.content))
    del cover
    
    spine = []

    chapter = epub.EpubHtml(title=chapter_title, file_name=f"{chapter_slug}.xhtml", lang="en", content=content)
    book.add_item(chapter)
    spine.append(chapter)

    for idx, image in enumerate(images):
      image_content = open(image, "rb").read()
      img = epub.EpubImage(
          uid=f"image_{idx}",
          file_name=image,
          content=image_content,
      )
      book.add_item(img)
      os.remove(image)
    
    book.add_item(epub.EpubNcx())
    book.add_item(epub.EpubNav())

    style = "BODY {color: white;}"
    nav_css = epub.EpubItem(
        uid="style_nav",
        file_name="style/nav.css",
        media_type="text/css",
        content=style,
    )
    book.add_item(nav_css)
    book.spine = ["nav", *spine]

    epub_file_name = f"Re: Zero Web Novel Arc {arc} - {chapter_title}.epub".replace(":", "")
    epub.write_epub(epub_file_name, book, {})

    upload_to_dbox(epub_file_name)
    
 def get_entry_content(url):
    res = session.get(url)    
    if res.status_code != 200:
        return
    page = res.content
    soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

    return soup.select_one(".entry-content")

 def main():
    entry_content = get_entry_content(chapters_url)
    chapters = entry_content.select("li")
    latest_chapter = chapters[-1]

    a_tag = latest_chapter.findChild()
    chapter_url = a_tag.get("href")
    chapter_title = a_tag.text
    filename = './latest_chapter.txt'
    if not os.path.exists(filename):
        open(filename, 'w').close()
    with open(filename, 'r+') as f:
      latest_chapter_url = f.readline()
      if latest_chapter_url != chapter_url:
          f.seek(0)
          f.write(chapter_url)
          f.truncate()

          entry_content = get_entry_content(chapter_url)
          p_image = entry_content.find("p").findChild()

          if p_image is None:
              next_sibling = entry_content.find("p").find_next_sibling()
              while next_sibling.name != "p" and next_sibling.findChild("img") is None:
                  next_sibling = next_sibling.find_next_sibling()
              p_image = next_sibling.findChild()
              
          cover_url = (p_image.get("src"))
          images = []
          for idx, tag in enumerate(entry_content.contents[:10]):
              if (tag.find("img")) and tag.find("img") != -1:
                image = tag.find("img")
                image_url = image.get("src")
                temp = session.get(image_url, stream=True)
                image_path = f"images/local_image_{idx}.jpg"
                
                os.makedirs(os.path.dirname(image_path), exist_ok=True)
                
                with open(image_path, "wb") as f:
                    f.write(temp.content)
                
                images.append(image_path)
                image["src"] = image_path
                del temp
          
          content = "".join([str(tag) for tag in entry_content.contents])
          create_epub(chapter_title, chapter_url, content, images, cover_url)

 if __name__ == "__main__":
    main()
	import html5lib # type: ignore

	import requests
	import os
	import dropbox

	from ebooklib import epub
	from bs4 import BeautifulSoup
	from config import app_folder, app_key, app_secret, oauth2_refresh_token

	session = requests.session()

	dbx = dropbox.Dropbox(
	app_key=app_key,
	app_secret=app_secret,
	oauth2_refresh_token=oauth2_refresh_token
	)


	try:
	dbx.files_get_metadata(app_folder)
	except dropbox.exceptions.ApiError as e:
	if type(e.error) == dropbox.files.GetMetadataError:
	dbx.files_create_folder(app_folder)

	arc = 9

	chapters_url = f"https://witchculttranslation.com/arc-{arc}/"

	def upload_to_dbox(epub_file_name):
	with open(epub_file_name, "rb",) as f:
	dbx.files_upload(f.read(), f"{app_folder}/{epub_file_name}")

	def create_epub(title, url, content, images, cover_url=None):
	book = epub.EpubBook()
	cover = session.get(cover_url, stream=True)

	chapter_slug = list(filter(None, url.split("/")))[-1]
	chapter_id = chapter_slug
	chapter_title = title

	book.set_identifier(chapter_id)
	book.set_title(chapter_title)
	book.set_language("en")
	book.set_cover("image.jpg", (cover.content))
	del cover

	spine = []

	chapter = epub.EpubHtml(title=chapter_title, file_name=f"{chapter_slug}.xhtml", lang="en", content=content)
	book.add_item(chapter)
	spine.append(chapter)

	for idx, image in enumerate(images):
	image_content = open(image, "rb").read()
	img = epub.EpubImage(
	uid=f"image_{idx}",
	file_name=image,
	content=image_content,
	)
	book.add_item(img)
	os.remove(image)

	book.add_item(epub.EpubNcx())
	book.add_item(epub.EpubNav())

	style = "BODY {color: white;}"
	nav_css = epub.EpubItem(
	uid="style_nav",
	file_name="style/nav.css",
	media_type="text/css",
	content=style,
	)
	book.add_item(nav_css)
	book.spine = ["nav", *spine]

	epub_file_name = f"Re: Zero Web Novel Arc {arc} - {chapter_title}.epub".replace(":", "")
	epub.write_epub(epub_file_name, book, {})

	upload_to_dbox(epub_file_name)

	def get_entry_content(url):
	res = session.get(url)
	if res.status_code != 200:
	return
	page = res.content
	soup = BeautifulSoup(page.decode("utf-8"), "html5lib")

	return soup.select_one(".entry-content")

	def main():
	entry_content = get_entry_content(chapters_url)
	chapters = entry_content.select("li")
	latest_chapter = chapters[-1]

	a_tag = latest_chapter.findChild()
	chapter_url = a_tag.get("href")
	chapter_title = a_tag.text
	filename = './latest_chapter.txt'
	if not os.path.exists(filename):
	open(filename, 'w').close()
	with open(filename, 'r+') as f:
	latest_chapter_url = f.readline()
	if latest_chapter_url != chapter_url:
	f.seek(0)
	f.write(chapter_url)
	f.truncate()

	entry_content = get_entry_content(chapter_url)
	p_image = entry_content.find("p").findChild()

	if p_image is None:
	next_sibling = entry_content.find("p").find_next_sibling()
	while next_sibling.name != "p" and next_sibling.findChild("img") is None:
	next_sibling = next_sibling.find_next_sibling()
	p_image = next_sibling.findChild()

	cover_url = (p_image.get("src"))
	images = []
	for idx, tag in enumerate(entry_content.contents[:10]):
	if (tag.find("img")) and tag.find("img") != -1:
	image = tag.find("img")
	image_url = image.get("src")
	temp = session.get(image_url, stream=True)
	image_path = f"images/local_image_{idx}.jpg"

	os.makedirs(os.path.dirname(image_path), exist_ok=True)

	with open(image_path, "wb") as f:
	f.write(temp.content)

	images.append(image_path)
	image["src"] = image_path
	del temp

	content = "".join([str(tag) for tag in entry_content.contents])
	create_epub(chapter_title, chapter_url, content, images, cover_url)

	if __name__ == "__main__":
	main()