jeryjs · March 10, 2024 12:23
diff --git a/compress_epubs.py b/compress_epubs.py
 import argparse
 import os
 import shutil
 import zipfile
 from PIL import Image

 # min filesize threshold in MB
 FILE_THRESHOLD = 1.5
 # image compression threshold in %
 COMPRESSION_QUALITY = 80

 def compress_image(image_path):
    print(f"\t\t\t{image_path.split('\\')[-3:]} ({os.path.getsize(image_path)})", end=' -> ')
    image = Image.open(image_path)
    width, height = image.size
    max_dim = max(width, height)
    if max_dim > 1080:
        ratio = 1080.0 / max_dim
        new_size = (int(ratio * width), int(ratio * height))
        image = image.resize(new_size, Image.LANCZOS)
    # if image_path.lower().endswith('.png') or os.path.getsize(image_path) > 200 * 1024:
    image.save(image_path, "WEBP", quality=COMPRESSION_QUALITY)
    print(f"({os.path.getsize(image_path)})")

 def compress_epub(epub_path, output_dir):
    print(f"\t\tCompressing: {epub_path.split('\\')[-1]}")
    # Create a new output directory for each epub file
    epub_dir = os.path.join(output_dir, os.path.splitext(os.path.basename(epub_path))[0])
    os.makedirs(epub_dir, exist_ok=True)
    
    # Rename to zip
    zip_path = os.path.join(epub_dir, os.path.basename(epub_path).rsplit('.', 1)[0] + '.zip')
    shutil.copy(epub_path, zip_path)
    
    # Extract zip
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(epub_dir)
    
    # Remove the zip file
    os.remove(zip_path)
    
    # Compress images
    print('  ->', end="  ")
    for root, dirs, files in os.walk(epub_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                compress_image(os.path.join(root, file))
    
    # Re-zip
    print(f"\t\tRe-Zipping: ", end='')
    with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as zipf:
        for root, dirs, files in os.walk(epub_dir):
            for file in files:
                if (file == zip_path.split('\\')[-1]):
                    continue
                print(file, end='  |  ')
                zipf.write(os.path.join(root, file), arcname=os.path.relpath(os.path.join(root, file), epub_dir))
    
    # Rename back to epub
    os.rename(zip_path, os.path.join(epub_dir, os.path.basename(epub_path)))

    # replace the temp epub dir and replace it with the compressed epub
    shutil.move(os.path.join(epub_dir, os.path.basename(epub_path)), os.path.join(output_dir, os.path.basename(epub_path)))
    shutil.rmtree(epub_dir)

 def traverse_and_compress(dir_path, output_dir):
    for root, dirs, files in os.walk(dir_path):
        print(f"Traversing and compressing directory: {root}")
        for file in files:
            if file.lower().endswith('.epub'):
                # print(file, end="  |  ")
                epub_path = os.path.join(root, file)
                file_size_mb = os.path.getsize(epub_path) / (1024 * 1024)
                # print(f"Size: {file_size_mb:.2f} MB")
                if file_size_mb >= FILE_THRESHOLD:
                    print(f"\n\tProcessing: {file}")
                    # Create a new output directory for each epub file
                    epub_output_dir = os.path.join(output_dir, os.path.relpath(root, dir_path))
                    os.makedirs(epub_output_dir, exist_ok=True)
                    compress_epub(epub_path, epub_output_dir)

 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Compress epub files in a directory.')
    parser.add_argument('dir', nargs='?', default='.', help='Directory to compress files from.')
    parser.add_argument('output', nargs='?', default='./Compressed_Epubs', help='Directory to output compressed files to.')
    args = parser.parse_args()

    # Call the function with the directory and output directory
    traverse_and_compress(args.dir, args.output)
	import argparse
	import os
	import shutil
	import zipfile
	from PIL import Image

	# min filesize threshold in MB
	FILE_THRESHOLD = 1.5
	# image compression threshold in %
	COMPRESSION_QUALITY = 80

	def compress_image(image_path):
	print(f"\t\t\t{image_path.split('\\')[-3:]} ({os.path.getsize(image_path)})", end=' -> ')
	image = Image.open(image_path)
	width, height = image.size
	max_dim = max(width, height)
	if max_dim > 1080:
	ratio = 1080.0 / max_dim
	new_size = (int(ratio * width), int(ratio * height))
	image = image.resize(new_size, Image.LANCZOS)
	# if image_path.lower().endswith('.png') or os.path.getsize(image_path) > 200 * 1024:
	image.save(image_path, "WEBP", quality=COMPRESSION_QUALITY)
	print(f"({os.path.getsize(image_path)})")

	def compress_epub(epub_path, output_dir):
	print(f"\t\tCompressing: {epub_path.split('\\')[-1]}")
	# Create a new output directory for each epub file
	epub_dir = os.path.join(output_dir, os.path.splitext(os.path.basename(epub_path))[0])
	os.makedirs(epub_dir, exist_ok=True)

	# Rename to zip
	zip_path = os.path.join(epub_dir, os.path.basename(epub_path).rsplit('.', 1)[0] + '.zip')
	shutil.copy(epub_path, zip_path)

	# Extract zip
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(epub_dir)

	# Remove the zip file
	os.remove(zip_path)

	# Compress images
	print(' ->', end=" ")
	for root, dirs, files in os.walk(epub_dir):
	for file in files:
	if file.lower().endswith(('.png', '.jpg', '.jpeg')):
	compress_image(os.path.join(root, file))

	# Re-zip
	print(f"\t\tRe-Zipping: ", end='')
	with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED, allowZip64=True) as zipf:
	for root, dirs, files in os.walk(epub_dir):
	for file in files:
	if (file == zip_path.split('\\')[-1]):
	continue
	print(file, end=' \| ')
	zipf.write(os.path.join(root, file), arcname=os.path.relpath(os.path.join(root, file), epub_dir))

	# Rename back to epub
	os.rename(zip_path, os.path.join(epub_dir, os.path.basename(epub_path)))

	# replace the temp epub dir and replace it with the compressed epub
	shutil.move(os.path.join(epub_dir, os.path.basename(epub_path)), os.path.join(output_dir, os.path.basename(epub_path)))
	shutil.rmtree(epub_dir)

	def traverse_and_compress(dir_path, output_dir):
	for root, dirs, files in os.walk(dir_path):
	print(f"Traversing and compressing directory: {root}")
	for file in files:
	if file.lower().endswith('.epub'):
	# print(file, end=" \| ")
	epub_path = os.path.join(root, file)
	file_size_mb = os.path.getsize(epub_path) / (1024 * 1024)
	# print(f"Size: {file_size_mb:.2f} MB")
	if file_size_mb >= FILE_THRESHOLD:
	print(f"\n\tProcessing: {file}")
	# Create a new output directory for each epub file
	epub_output_dir = os.path.join(output_dir, os.path.relpath(root, dir_path))
	os.makedirs(epub_output_dir, exist_ok=True)
	compress_epub(epub_path, epub_output_dir)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Compress epub files in a directory.')
	parser.add_argument('dir', nargs='?', default='.', help='Directory to compress files from.')
	parser.add_argument('output', nargs='?', default='./Compressed_Epubs', help='Directory to output compressed files to.')
	args = parser.parse_args()

	# Call the function with the directory and output directory
	traverse_and_compress(args.dir, args.output)