Skip to content

Instantly share code, notes, and snippets.

@jeryjs
Last active March 10, 2024 12:23
Show Gist options
  • Save jeryjs/256e06db532c81381565143d00c97baa to your computer and use it in GitHub Desktop.
Save jeryjs/256e06db532c81381565143d00c97baa to your computer and use it in GitHub Desktop.
Compress epub files in batch. The script works recursively, and maintains the same input directory's file structure in the output directory. Usage: py ./compress_epubs.py 'path/to/epubs/dir' 'path/to/output/dir'
import argparse
import os
import shutil
import zipfile
from PIL import Image
# min filesize threshold in MB
FILE_THRESHOLD = 1.5
# image compression threshold in %
COMPRESSION_QUALITY = 80
def compress_image(image_path):
print(f"\t\t\t{image_path.split('\\')[-3:]} ({os.path.getsize(image_path)})", end=' -> ')
image = Image.open(image_path)
width, height = image.size
max_dim = max(width, height)
if max_dim > 1080:
ratio = 1080.0 / max_dim
new_size = (int(ratio * width), int(ratio * height))
image = image.resize(new_size, Image.LANCZOS)
# if image_path.lower().endswith('.png') or os.path.getsize(image_path) > 200 * 1024:
image.save(image_path, "WEBP", quality=COMPRESSION_QUALITY)
print(f"({os.path.getsize(image_path)})")
def compress_epub(epub_path, output_dir):
print(f"\t\tCompressing: {epub_path.split('\\')[-1]}")
# Create a new output directory for each epub file
epub_dir = os.path.join(output_dir, os.path.splitext(os.path.basename(epub_path))[0])
os.makedirs(epub_dir, exist_ok=True)
# Rename to zip
zip_path = os.path.join(epub_dir, os.path.basename(epub_path).rsplit('.', 1)[0] + '.zip')
shutil.copy(epub_path, zip_path)
# Extract zip
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(epub_dir)
# Remove the zip file
os.remove(zip_path)
# Compress images
print(' ->', end=" ")
for root, dirs, files in os.walk(epub_dir):
for file in files:
if file.lower().endswith(('.png', '.jpg', '.jpeg')):
compress_image(os.path.join(root, file))
# Re-zip
print(f"\t\tRe-Zipping: ", end='')
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as zipf:
for root, dirs, files in os.walk(epub_dir):
for file in files:
if (file == zip_path.split('\\')[-1]):
continue
print(file, end=' | ')
zipf.write(os.path.join(root, file), arcname=os.path.relpath(os.path.join(root, file), epub_dir))
# Rename back to epub
os.rename(zip_path, os.path.join(epub_dir, os.path.basename(epub_path)))
# replace the temp epub dir and replace it with the compressed epub
shutil.move(os.path.join(epub_dir, os.path.basename(epub_path)), os.path.join(output_dir, os.path.basename(epub_path)))
shutil.rmtree(epub_dir)
def traverse_and_compress(dir_path, output_dir):
for root, dirs, files in os.walk(dir_path):
print(f"Traversing and compressing directory: {root}")
for file in files:
if file.lower().endswith('.epub'):
# print(file, end=" | ")
epub_path = os.path.join(root, file)
file_size_mb = os.path.getsize(epub_path) / (1024 * 1024)
# print(f"Size: {file_size_mb:.2f} MB")
if file_size_mb >= FILE_THRESHOLD:
print(f"\n\tProcessing: {file}")
# Create a new output directory for each epub file
epub_output_dir = os.path.join(output_dir, os.path.relpath(root, dir_path))
os.makedirs(epub_output_dir, exist_ok=True)
compress_epub(epub_path, epub_output_dir)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Compress epub files in a directory.')
parser.add_argument('dir', nargs='?', default='.', help='Directory to compress files from.')
parser.add_argument('output', nargs='?', default='./Compressed_Epubs', help='Directory to output compressed files to.')
args = parser.parse_args()
# Call the function with the directory and output directory
traverse_and_compress(args.dir, args.output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment