Created
March 2, 2020 13:43
-
-
Save cyxx/e813961f6993d8784900d08aa4f49e2d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# | |
# Compress .ISO to .CSO (v1), similar to https://sourceforge.net/projects/ciso/ | |
# | |
# - compression level is set to 9 (maximum) | |
# - each block is compressed using the 3 zlib strategies (default, filtered, huffman), the smallest is kept | |
# | |
from __future__ import print_function | |
import multiprocessing | |
import os | |
import sys | |
import struct | |
import zlib | |
MODE_DECOMPRESS = 0 | |
MODE_COMPRESS = 1 | |
MODE_INFO = 2 | |
HEADER_SIZE = 24 | |
VERSION = 1 | |
BLOCK_SIZE = 2048 | |
WBITS = -15 | |
fn = sys.argv[1] | |
with open(fn, 'rb') as f: | |
# detect input file format | |
header = f.read(HEADER_SIZE) | |
if header[:4] == b'CISO': | |
length = struct.unpack('<I', header[4:8])[0] | |
uncompressed = struct.unpack('<Q', header[8:16])[0] | |
block = struct.unpack('<I', header[16:20])[0] | |
version = ord(header[20]) | |
align = ord(header[21]) | |
print('CISO header:%d uncompressed:%d block:%d version:%d align:%d' % (length, uncompressed, block, version, align)) | |
mode = MODE_DECOMPRESS | |
ext = '.iso' | |
else: | |
f.seek(0) | |
st = os.stat(fn) | |
uncompressed = st.st_size | |
block = BLOCK_SIZE | |
print('ISO size:%d' % uncompressed) | |
mode = MODE_COMPRESS | |
ext = '.cso' | |
# do not overwrite if the output file already exists | |
fn = os.path.splitext(fn)[0] + ext | |
if os.path.exists(fn): | |
print("ERROR: output file '%s' exists" % fn) | |
sys.exit(-1) | |
# file size must be aligned to a block size | |
assert (uncompressed % block) == 0 | |
count = int(uncompressed / block) | |
with open(fn, 'wb') as of: | |
if mode == MODE_DECOMPRESS: | |
indexes = [ struct.unpack('<I', f.read(4))[0] for i in range(count) ] | |
indexes.append(uncompressed) | |
for i in range(count): | |
index = indexes[i] | |
if (index & 0x80000000) != 0: | |
# uncompressed block | |
size = block | |
else: | |
size = (indexes[i + 1] - indexes[i]) << align | |
pos = (indexes[i] & 0x7FFFFFFF) << align | |
f.seek(pos) | |
data = f.read(size) | |
if size != block: | |
data = zlib.decompress(data, WBITS) | |
assert len(data) == block | |
of.write(data) | |
print('block %d/%d' % (i, count), end='\r') | |
sys.stdout.flush() | |
elif mode == MODE_COMPRESS: | |
# dummy header and indexes tables | |
offset = HEADER_SIZE + 4 * (count + 1) | |
indexes = [ 0xFFFFFFFF for i in range(count + 1) ] | |
of.write(header) | |
for i in indexes: | |
of.write(struct.pack('<I', i)) | |
for i in range(count): | |
indexes[i] = offset | |
data = f.read(block) | |
# compress with the strategies supported by zlib and keep the smallest block | |
cdat = data | |
for strategy in (zlib.Z_DEFAULT_STRATEGY, zlib.Z_FILTERED, zlib.Z_HUFFMAN_ONLY): | |
obj = zlib.compressobj(9, zlib.DEFLATED, WBITS, 9, strategy) | |
tmp = obj.compress(data) | |
tmp += obj.flush(zlib.Z_FINISH) | |
if len(tmp) < len(cdat): | |
cdat = tmp | |
offset += len(cdat) | |
of.write(cdat) | |
if len(cdat) == block: | |
# uncompressed | |
indexes[i] |= 0x80000000 | |
print('block %d/%d' % (i, count), end='\r') | |
sys.stdout.flush() | |
indexes[count] = offset | |
# fixup/rewrite header and indexes table | |
of.seek(0) | |
header = 'CISO' | |
header += struct.pack('<I', HEADER_SIZE) | |
header += struct.pack('<Q', uncompressed) | |
header += struct.pack('<I', block) | |
header += chr(VERSION) | |
header += '\x00' # align | |
header += '\x00\x00' | |
of.write(header) | |
for i in indexes: | |
of.write(struct.pack('<I', i)) | |
print('size:%d ratio:%f' % (offset, (uncompressed - offset) * 100. / uncompressed)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment