Skip to content

Instantly share code, notes, and snippets.

@fbwright
Created May 30, 2015 14:48
Show Gist options
  • Save fbwright/b9d7c3fcfe573b3c63e6 to your computer and use it in GitHub Desktop.
Save fbwright/b9d7c3fcfe573b3c63e6 to your computer and use it in GitHub Desktop.
Simple assembler for a simple VM
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function, division
import sys
from time import sleep
if sys.version_info.major < 3:
input = raw_input
#THIS! IS! A! FUCKING! MESS!
#What follows was cobbled together in ~1 hour of sleepy boredom*,
# and thus may be filled with hacks, not-well-thought-out names and
# other zany things. Blaargh.
#It fails on error (on any error), it's not readily extensible (if
# it can be extended at all) and all-around bad.
#I hate christmas.
#* Und then extended on the following day, still sleepy-bored...
# I would give a finger for a good night's sleep.
#
#The only good things that I can say about this code is that it
# works. Kinda, sorta, if you don't look at it too closely. Not
# on wednesdays.
#TODO: I need to fucking refactor the following...
def dByte(params, line_number):
#print("Called dByte")
byte = bytearray()
for param in params:
try:
if param.startswith("0x"):
value = int(param, 16)
else:
value = int(param)
#print("*^", value)
#if value < 0:
# value = 0x10000 + value
#v_top = value >> 8
#v_bot = value & 0xFF
#print("*^ %x %x" %(v_top, v_bot), value)
try:
byte.append(value) #This is what throws a ValueError
#byte.append(v_bot) # with negative numbers...
#Handled. Not in a good way, but handled.
except ValueError:
print("Error: Number out of bounds %s (%2X) at line %s." % (value, value, line_number))
#raise SystemExit
except ValueError:
#print("Isalabel", param)
byte.extend(bytearray(1))
return byte, param
#print(len(byte), byte)
return byte, None
def dWord(params, line_number):
pass
PseudoInstructions = {
"DBYTE": dByte,
"DWORD": dWord
}
Instructions = {
"NOP": 0b00000000,
"PUSH": 0b00100000,
"DROP": 0b00100001,
"DUP": 0b00100010,
"SWAP": 0b00100011,
"PICK": 0b00100100,
"ADD": 0b01000000,
"SUB": 0b01000001,
"JUMP": 0b01111111,
"JREL": 0b01110000,
"JZ": 0b01100000,
"PRINT": 0b11100000,
}
def no_parameters(func):
def func_wrapper(params, line_number):
if len(params) > 0:
print("Error: %s does not need any parameters - at line %s." % (func.__name__[3:], line_number))
return func_wrapper
def one_parameter(func):
def func_wrapper(params, line_number):
if len(params) != 1:
print("Error: %s requires exactly one parameter - at line %s." % (func.__name__[3:], line_number))
return func_wrapper
def word(func):
def func_wrapper(params, line_number):
try:
getWord(param[0])
except ValueError:
print("Error: %s expects a word parameter - at line %s." % (func.__name__[3:], line_number))
return func_wrapper
def newbase(op):
byte = bytearray()
byte.append(Instructions[op])
@no_parameters
def op_NOP(params, line_number): return newbase("NOP"), None
@one_parameter
@word
def op_PUSH(params, line_number): pass
@no_parameters
def op_DROP(params, line_number): return newbase("DROP"), None
@no_parameters
def op_DUP(params, line_number): return newbase("DUP"), None
@no_parameters
def op_SWAP(params, line_number): return newbase("SWAP"), None
def op_PICK(params, line_number): pass
@no_parameters
def op_ADD(params, line_number): return newbase("ADD"), None
@no_parameters
def op_SUB(params, line_number): return newbase("SUB"), None
def op_JUMP(params, line_number): pass
def op_JZ(params, line_number): pass
def op_JREL(params, line_number): pass
@no_parameters
def op_PRINT(params, line_number): return newbase("PRINT"), None
Instr_Funct = {}
for instr in Instructions:
exec("{0} = {1}".format(instr, Instructions[instr]))
exec("Instr_Funct['{0}'] = op_{0}".format(instr))
ParamByte = (PICK, )
ParamWord = (PUSH, JUMP, JZ)
#What! Follows! Is! Quite! Silly! WE! HAAAVE!! EXEEEEC!!!
#Redundancy is bad, mkay? And yes, yes, exec is bad and all that jazz...
# Don't do this at home, children!
#PUSH, DROP, DUP, ADD, SUB, PRINT, JUMP, JZ = 0b1, 0b10, 0b11, 0b1000, 0b1001, 0b1111, 0b11111, 0b10000
def execute(bytecode, debug=0):
stack = []
pc = 0
while pc < len(bytecode):
byte = bytecode[pc]
try:
word_h = bytecode[pc + 1]
word_l = bytecode[pc + 2]
except IndexError:
word_h = word_l = 0
word = (word_h << 8 | word_l)
byte_ = word_h
if word > 2**7:
word -= 0x10000
if debug:
print("PC: %s\t%02X %02X %02X\t[%s] %s" % (pc, byte, word_h, word_l, len(stack), " ".join(str(i) for i in stack)))
sleep(0.2)
pc += 1
if byte == NOP:
pass
elif byte == PUSH:
stack.append(word)
pc += 2
elif byte == DROP:
stack.pop()
elif byte == DUP:
stack.append(stack[-1])
elif byte == SWAP:
a, b = stack.pop(), stack.pop()
stack.append(a)
stack.append(b)
elif byte == PICK:
stack.append(stack[-byte_])
elif byte == ADD:
stack.append(stack.pop() + stack.pop())
elif byte == SUB:
stack.append(-stack.pop() + stack.pop())
elif byte == PRINT:
print(stack.pop())
elif byte == JUMP:
pc = word
elif byte == JREL:
pc += 2
pc += word
elif byte == JZ:
if stack[-1] != 0: pc = word
else: pc += 2
def drop_comments(line):
return line.rsplit("#")[0]
def tokenize(program):
labels = {}
ret = []
for line in program:
temp = drop_comments(line).split(":")
if len(temp) == 1:
temp = ["", temp[0]]
else:
labels[temp[0]] = -1
temp[1] = temp[1].split(maxsplit=1)
if len(temp[1]) > 1:
temp[1][1:] = (i.strip() for i in temp[1][1].split(","))
ret.append(temp)
return ret, labels
def getWord(string, line_number):
result = bytearray()
if string.startswith("0x"):
value = int(string, 16)
else:
value = int(string)
if value < 0:
value = 0x10000 + value
word_h = value >> 8
word_l = value & 0xFF
if word_h > 0xFF:
print("Error: Number out of bounds %s (%2X %2X) at line %s - truncated (%2X %2X)." % (value, word_h, word_l, line_number, word_h & 0xFF, word_l))
word_h &= 0xFF
result.append(word_h)
result.append(word_l)
return result
def getByte(string, line_number):
result = bytearray()
value = int(string, 16 if string.startswith("0x") else 10)
value += 0x100 if value < 0 else 0
if value > 0xFF or value < 0x00:
print("Error: Byte out of bounds %s (%2X) at line %s - truncated (%2X)." % (value, value, line_number, value & 0xFF))
value &= 0xFF
result.append(value)
return result
def emit(instr, params, line_number):
byte = bytearray()
if instr in PseudoInstructions:
return PseudoInstructions[instr](params, line_number)
#else:
# return Instr_Funct[instr](params, line_number)
byte.append(Instructions[instr])
if Instructions[instr] in ParamByte or \
Instructions[instr] in ParamWord:
for param in params:
try:
byte.extend(getWord(param, line_number))
except ValueError:
#print("Isalabel", param)
byte.extend(bytearray(2))
return byte, param
return byte, None
def emit_bytecode(program, labels):
to_labels = [] #short for labels_to_place or something
bytecode = bytearray()
index = 0
#First pass - find labels and emit instructions opcodes
for line, token in enumerate(program):
label = token[0]
if label:
labels[label] = index
if len(token[1]) == 0: continue
instr, params = token[1][0], token[1][1:]
byte, lbl = emit(instr, params, line)
if lbl:
to_labels.append((lbl, index, line))
bytecode.extend(byte)
index += len(byte)
#Second pass - emit labels' addresses
for label, index, line in to_labels:
#print("Found label '%s' @ %s" % (label, index))
try:
address = labels[label]
bytecode[index + 1] = address >> 8
bytecode[index + 2] = address & 0xFF
except KeyError:
print("Error: Unknown label '%s' at line %s." % (label, line))
#raise SystemExit
return bytecode
def disassemble(bytecode):
#Sort of - I'm not really disassembling this, just splitting the binary
# into instructions (opcode + parameters, opcode only)
disassembled = []
index = 0
while index < len(bytecode):
byte = bytecode[index]
if byte in ParamWord:
disassembled.append(bytecode[index:index+3])
index += 2
elif byte in ParamByte:
disassembled.append(bytecode[index:index+2])
index += 1
else:
disassembled.append(byte)
index += 1
return disassembled
def load():
ret = []
cmd = ""
print("Write the program - '@' to stop:")
while cmd != "@":
cmd = input(".. ")
ret.append(cmd)
return ret
def chunks(l, n):
""" Yield successive n-sized chunks from l.
"""
for i in range(0, len(l), n):
yield l[i:i+n]
def hex_bytearray(b):
if type(b) is int:
return "%02X" % b
return " ".join(("%02X" % byte for byte in b))
def hexdump(bytecode):
BYTES = 16
print("\nHEXDUMP - Dumped {0} bytes.".format(len(bytecode)))
for index, chunk in enumerate(chunks(bytecode, BYTES)):
offset = index * BYTES
hex, ori = "", ""
for byte in chunk:
hex = hex + "%02X" % byte + " "
if byte not in (0, 8, 9, 10, 13) and \
byte < 0x80 or byte > 0x9F:
ori = ori + chr(byte)
else:
ori = ori + '.'
print("{0:0>8X} {1:{2}} {3:{4}}".format(
offset, hex, 3 * BYTES, ori, BYTES))
print()
# for byte in range(256):
# if byte < 0x80 or byte > 0x9F: print(chr(byte), end="")
# else: print(".", end="")
if __name__ == "__main__":
#program = load()
program = \
"""
#Calculate fibonacci numbers 1..10
"""
program = \
"""
NOP 33
PUSH baa, ba
init:
PUSH 8 #This is a quite long comment that should be cut
PUSH 4
ADD
DUP
PRINT
loop:
PUSH 1
SUB
DUP
PRINT
JZ loop
JUMP end
DROP #All
PUSH 0xFFFF #this
PRINT #is
DROP #skipped
JUMP iniv #!!!1!
PUSH 3789652
PUSH 127
end:
PUSH -1111
PRINT
PUSH 0xFFFF
PRINT
PUSH 129
""".split("\n")
#print("\n".join(program))
tokens, labels = tokenize(program)
bytecode = emit_bytecode(tokens, labels)
split_bytecode, s_i = disassemble(bytecode), 0
print("{0:6} {1:32} {2:30}{3}".format("LINE", "ORIGINAL", "TOKENIZED", "ASSEMBLED"))
for line_number, (line, token) in enumerate(zip(program, tokens)):
#print("%6s %30s %30s" % (line_number, line, token))
print("{0:>6} {1:<32} {2:<30}".format(line_number, line[:min(len(line), 32)], "%s"%token), end = "")
if len(token[1]):
print(hex_bytearray(split_bytecode[s_i]), end = "")
s_i += 1
print()
#print(tokens, labels)
hexdump(bytecode)
#print(", ".join(hex_bytearray(b) for b in disassemble(bytecode)))
# print("\"", end="")
# for byte in bytecode:
# print(chr(byte), end="")
# print("\"")
print("STARTING EXECUTION")
execute(bytecode)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment