Created
June 22, 2020 03:39
-
-
Save Luavis/b3095217bdc5f8d02ab7c5a7546480a1 to your computer and use it in GitHub Desktop.
Java decompiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import struct | |
import re | |
from io import BytesIO | |
import sys | |
from code_attribute import read_code_attribute | |
uint8 = lambda x: struct.unpack('>B', x)[0] | |
uint16 = lambda x: struct.unpack('>H', x)[0] | |
int32 = lambda x: struct.unpack('>l', x)[0] | |
uint32 = lambda x: struct.unpack('>L', x)[0] | |
int64 = lambda x: struct.unpack('>q', x)[0] | |
float32 = lambda x: struct.unpack('>f', x)[0] | |
float64 = lambda x: struct.unpack('>f', x)[0] | |
cp = [] | |
class ContantPool: | |
def __init__(self, pool): | |
self.pool = pool | |
def __getitem__(self, index): | |
tag, const = self.pool[index] | |
if tag in (7, 8): | |
return self[const] | |
elif tag in (9, 10, 11, 12): | |
return (self[const[0]], self[const[1]]) | |
else: | |
return const | |
def read_acc_flag(acc_table, acc): | |
acc_flags = [] | |
for key, value in acc_table.items(): | |
if acc & key != 0: | |
acc_flags.append(value) | |
return acc_flags | |
def read_contant_pool(stream, pool_size): | |
# constant pool index start with 1 | |
pool = [None] | |
for _ in range(pool_size - 1): | |
tag = uint8(stream.read(1)) | |
const = None | |
if tag == 1: | |
size = uint16(stream.read(2)) | |
const = stream.read(size).decode('utf-8') | |
elif tag == 3: | |
const = int32(stream.read(4)) | |
elif tag == 4: | |
const = float32(stream.read(4)) | |
elif tag == 5: | |
const = int64(stream.read(8)) | |
elif tag == 6: | |
const = float64(stream.read(8)) | |
elif tag in (7, 8): | |
const = uint16(stream.read(2)) | |
elif tag in (9, 10, 11, 12): | |
# class ref, name and type | |
const = (uint16(stream.read(2)), uint16(stream.read(2))) | |
else: | |
print('not found cp tag', tag) | |
break | |
pool.append((tag, const)) | |
return ContantPool(pool) | |
def read_attrs(stream, size): | |
attrs = {} | |
for _ in range(size): | |
name = cp[uint16(stream.read(2))] | |
attr_len = uint32(stream.read(4)) | |
attrs[name] = stream.read(attr_len) | |
return attrs | |
def read_fields(stream, cp, size): | |
fields = [] | |
acc_table = { | |
0x0001: 'public', | |
0x0002: 'private', | |
0x0004: 'protected', | |
0x0008: 'static', | |
0x0010: 'final', | |
0x0040: 'volatile', | |
0x0080: 'transient', | |
0x1000: 'synthetic', | |
0x4000: 'enum', | |
} | |
for _ in range(size): | |
acc_flags = read_acc_flag(acc_table, uint16(stream.read(2))) | |
name = cp[uint16(stream.read(2))] | |
descriptor = cp[uint16(stream.read(2))] | |
attr_count = uint16(stream.read(2)) | |
attrs = read_attrs(stream, attr_count) | |
fields.append(( | |
acc_flags, | |
name, | |
descriptor, | |
attrs, | |
)) | |
return fields | |
def read_methods(stream, cp, size): | |
methods = [] | |
acc_table = { | |
0x0001: 'public', | |
0x0002: 'private', | |
0x0004: 'protected', | |
0x0008: 'static', | |
0x0010: 'final', | |
0x0020: 'synchronized', | |
0x0040: 'bridge', | |
0x0080: 'varargs', | |
0x0100: 'native', | |
0x0400: 'abstract', | |
0x0800: 'strict', | |
0x1000: 'synthetic', | |
} | |
for _ in range(size): | |
acc_flags = read_acc_flag(acc_table, uint16(stream.read(2))) | |
name = cp[uint16(stream.read(2))] | |
descriptor = cp[uint16(stream.read(2))] | |
attr_count = uint16(stream.read(2)) | |
attrs = read_attrs(stream, attr_count) | |
methods.append(( | |
acc_flags, | |
name, | |
descriptor, | |
attrs, | |
)) | |
return methods | |
def read_header(stream): | |
version_table = { | |
0x3A: 'Java SE 14', | |
0x39: 'Java SE 13', | |
0x38: 'Java SE 12', | |
0x37: 'Java SE 11', | |
0x36: 'Java SE 10', | |
0x35: 'Java SE 9', | |
0x34: 'Java SE 8', | |
0x33: 'Java SE 7', | |
0x32: 'Java SE 6.0', | |
0x31: 'Java SE 5.0', | |
0x30: 'JDK 1.4', | |
0x2F: 'JDK 1.3', | |
0x2E: 'JDK 1.2', | |
0x2D: 'JDK 1.1', | |
} | |
minor = uint16(stream.read(2)) | |
major = uint16(stream.read(2)) | |
cp_size = uint16(stream.read(2)) | |
cp = read_contant_pool(stream, cp_size) | |
version = major_version = version_table.get(major) | |
return (version, major, minor, cp) | |
def read_body(stream, cp): | |
acc_table = { | |
0x0001: 'public', | |
0x0010: 'final', | |
0x0020: 'super', | |
0x0200: 'interface', | |
0x0400: 'abstract', | |
0x1000: 'synthetic', | |
0x2000: 'annotation', | |
0x4000: 'enum', | |
} | |
access_flags = uint16(stream.read(2)) | |
class_idx = uint16(stream.read(2)) | |
super_idx = uint16(stream.read(2)) | |
interface_size = uint16(stream.read(2)) | |
# TODO: read interface | |
field_size = uint16(stream.read(2)) | |
fields = read_fields(stream, cp, field_size) | |
method_size = uint16(stream.read(2)) | |
methods = read_methods(stream, cp, method_size) | |
attr_count = uint16(stream.read(2)) | |
attrs = read_attrs(stream, attr_count) | |
acc_flags = read_acc_flag(acc_table, access_flags) | |
class_name = cp[class_idx] | |
super_name = cp[super_idx] | |
return ( | |
acc_flags, class_name, super_name, | |
fields, methods, attrs, | |
) | |
def format_method_description(desc): | |
a = re.compile(r'^\(([^\)]*)\)([\S]+)$') | |
mo = a.search(desc) | |
params = mo.group(1).split(';')[:-1] | |
ret = mo.group(2) | |
return f"({', '.join(params)}): {ret}" | |
def main(name, cmd, params): | |
global cp | |
with open(f'./{name}.class', 'rb') as f: | |
if f.read(4) == b'\xca\xfe\xba\xbe': | |
version, major, minor, cp = read_header(f) | |
# print(f"{version} version: {major}.{minor}") | |
acc_flags, class_name, super_name, fields, methods, attrs = \ | |
read_body(f, cp) | |
field_decls = map(lambda x: f"{' '.join(x[0])} {x[2]} {x[1]};", fields) | |
method_decls = map(lambda x: f"{' '.join(x[0])} {x[1]} {format_method_description(x[2])}", methods) | |
if cmd == 'methods': | |
for i, method_decl in enumerate(method_decls): | |
print(f'{i}: {method_decl}') | |
elif cmd == 'method': | |
index = int(params[0]) | |
print(list(method_decls)[index]) | |
codes = read_code_attribute(cp, methods[index][3]['Code']) | |
print('\n'.join(map(lambda x: f'{x.offset}: {x}', codes))) | |
if __name__ == '__main__': | |
name = sys.argv[1] | |
cmd = sys.argv[2] | |
main(name, cmd, sys.argv[3:]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import BytesIO | |
import struct | |
uint8 = lambda x: struct.unpack('>B', x)[0] | |
int16 = lambda x: struct.unpack('>h', x)[0] | |
uint16 = lambda x: struct.unpack('>H', x)[0] | |
uint32 = lambda x: struct.unpack('>L', x)[0] | |
class OpParser: | |
def __init__(self, mnemonic, param_size=0, reader=None): | |
self.mnemonic = mnemonic | |
self.param_size = param_size | |
self.reader = reader | |
def parse(self, stream, cp, offset): | |
params = None | |
if self.param_size != 0: | |
data = stream.read(self.param_size) | |
if self.reader is not None: | |
params = self.reader(self, cp, data) | |
op = Op(self.mnemonic, params, offset) | |
offset += 1 + self.param_size | |
return op, offset | |
class Op: | |
def __init__(self, mnemonic, params, offset): | |
self.mnemonic = mnemonic | |
self.params = params | |
self.offset = offset | |
def __repr__(self): | |
if self.params is None: | |
return self.mnemonic | |
else: | |
params = ' '.join(map(lambda x: str(x), self.params)) | |
return f"{self.mnemonic} {params}" | |
def index_reader(builder, cp, data): | |
if builder.param_size == 2: | |
index = uint16(data) | |
return [cp[index]] | |
elif builder.param_size == 1: | |
index = uint8(data) | |
return [cp[index].encode('utf-8')] | |
def uint8_reader(builder, cp, data): | |
return [uint8(data)] | |
def uint16_reader(builder, cp, data): | |
return [uint16(data)] | |
def int16_reader(builder, cp, data): | |
return [int16(data)] | |
def invoke_interface_reader(builder, cp, data): | |
index = uint16(data[:2]) | |
return [cp[index], data[2], data[3]] | |
def iinc_reader(builder, cp, data): | |
index = uint8(data[0:1]) | |
return [index, data[1]] | |
def primitive_type_reader(builder, cp, data): | |
index = uint8(data) | |
return [{ | |
4: 'BOOLEAN', | |
5: 'CHAR', | |
6: 'FLOAT', | |
7: 'DOUBLE', | |
8: 'BYTE', | |
9: 'SHORT', | |
10: 'INT', | |
11: 'LONG', | |
}[index]] | |
code_table = { | |
b'\x00': OpParser('nop'), | |
b'\xbb': OpParser('new', 2, index_reader), | |
b'\x59': OpParser('dup'), | |
b'\xbe': OpParser('arraylength'), | |
b'\xb7': OpParser('invokespecial', 2, index_reader), | |
b'\xb8': OpParser('invokestatic', 2, index_reader), | |
b'\x3a': OpParser('astore', 1, uint8_reader), | |
b'\x4b': OpParser('astore_0'), | |
b'\x4c': OpParser('astore_1'), | |
b'\x4d': OpParser('astore_2'), | |
b'\x4e': OpParser('astore_3'), | |
b'\x41': OpParser('lstore_2'), | |
b'\x03': OpParser('iconst_0'), | |
b'\x04': OpParser('iconst_1'), | |
b'\x05': OpParser('iconst_2'), | |
b'\x06': OpParser('iconst_3'), | |
b'\x07': OpParser('iconst_4'), | |
b'\x08': OpParser('iconst_5'), | |
b'\x58': OpParser('pop2'), | |
b'\x36': OpParser('istore', 1, uint8_reader), | |
b'\x3c': OpParser('istore_1'), | |
b'\x3d': OpParser('istore_2'), | |
b'\x3e': OpParser('istore_3'), | |
b'\x19': OpParser('aload', 1, uint8_reader), | |
b'\x32': OpParser('aaload'), | |
b'\x2a': OpParser('aload_0'), | |
b'\x2b': OpParser('aload_1'), | |
b'\x2c': OpParser('aload_2'), | |
b'\x2d': OpParser('aload_3'), | |
b'\x15': OpParser('iload', 1, uint8_reader), | |
b'\x1b': OpParser('iload_1'), | |
b'\x1c': OpParser('iload_2'), | |
b'\x1d': OpParser('iload_3'), | |
b'\xc6': OpParser('ifnull', 2, int16_reader), | |
b'\xa2': OpParser('if_icmpge', 2, int16_reader), | |
b'\x99': OpParser('ifeq', 2, int16_reader), | |
b'\x9a': OpParser('ifne', 2, int16_reader), | |
b'\xa7': OpParser('goto', 2, int16_reader), | |
b'\xbf': OpParser('athrow'), | |
b'\xb9': OpParser('invokeinterface', 4, invoke_interface_reader), | |
b'\xa5': OpParser('if_acmpeq', 2, int16_reader), | |
b'\x57': OpParser('pop'), | |
b'\xbc': OpParser('newarray', 1, primitive_type_reader), | |
b'\xbd': OpParser('anewarray', 2, index_reader), | |
b'\xb2': OpParser('getstatic', 2, index_reader), | |
b'\xb6': OpParser('invokevirtual', 2, index_reader), | |
b'\x84': OpParser('iinc', 2, iinc_reader), | |
b'\xb0': OpParser('areturn'), | |
b'\xb1': OpParser('return'), | |
b'\x01': OpParser('aconst_null'), | |
b'\x12': OpParser('ldc', 1, index_reader), | |
b'\x5b': OpParser('dup_x2'), | |
b'\x5f': OpParser('swap'), | |
b'\x64': OpParser('isub'), | |
b'\x0c': OpParser('fconst_1'), | |
b'\x0d': OpParser('fconst_2'), | |
b'\x78': OpParser('ishl'), | |
b'\x82': OpParser('ixor'), | |
b'\xc0': OpParser('checkcast', 2, index_reader), | |
b'\x92': OpParser('i2c'), | |
b'\x55': OpParser('castore'), | |
b'\x9c': OpParser('ifge', 2, int16_reader), | |
b'\xc7': OpParser('ifnonnull', 2, int16_reader), | |
b'\xac': OpParser('ireturn'), | |
} | |
def read_byte_code(cp, stream): | |
codes = [] | |
offset = 0 | |
while True: | |
op_code = stream.read(1) | |
parser = code_table.get(op_code) | |
if parser != None: | |
op, offset = parser.parse(stream, cp, offset) | |
codes.append(op) | |
elif op_code == b'': | |
return codes | |
else: | |
print(f"code not found {hex(uint8(op_code))}") | |
print() | |
break | |
return codes | |
def read_code_attribute(cp, code_attribute): | |
stream = BytesIO(code_attribute) | |
max_stack = uint16(stream.read(2)) | |
max_locals = uint16(stream.read(2)) | |
code_length = uint32(stream.read(4)) | |
code = stream.read(code_length) | |
codes = read_byte_code(cp, BytesIO(code)) | |
return codes |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment