Skip to content

Instantly share code, notes, and snippets.

@pvsousalima
Created November 6, 2014 21:29
Show Gist options
  • Save pvsousalima/4c63e652a1a14a36bd20 to your computer and use it in GitHub Desktop.
Save pvsousalima/4c63e652a1a14a36bd20 to your computer and use it in GitHub Desktop.
A virtual stack machine interpreter written in python
# CSC 333 -- Homework 4 -- Stack machine interpreter
# Pedro Victor
# Time-stamp: <2014-10-21 21:04:52 shade>
# Your job is to complete the 'runvm' function. Don't change any of
# the other code. I will test your program using the 'run' function,
# which calls 'runvm' to do the heavy lifting.
#
# This code simulates a simple stack VM (virtual machine). Programs
# are written in a compact string form that is then compiled into a
# list of VM instructions. The tokens are integer literals, string
# literals in backquotes (not single quotes), names that contain one
# or more lowercase letters, and opcodes that are either single
# uppercase letters or single punctuation characters.
#
# The VM comprises a list of instructions, each of the form (opcode,
# arg); a runtime stack of integers; a list of inputs; a list of
# outputs; and a dictionary that maps variable names to their integer
# values. The default value for a variable is zero, but that can be
# changed with an S (store) instruction. The result from running the
# VM is a string containing the list of outputs joined together.
#
# See the doctests in the 'run' function for examples. The 'run'
# function calls the 'compile' function automatically, but you can use
# the 'compile' function by itself to see how programs are converted
# to lists of VM instructions.
#
# Below is a list of all the instructions. The first column shows how
# you write them in a program. The second column shows how they're
# represented in the VM after being compiled. The third column
# explains their effects.
#
# Program VM insn Effect (stack grows to the right)
# ------- ------------ ---------------------------------
# 417 ('#', 417) push the integer on the stack
# `:-)` ('`', ':-)') write the string to the output
#
# + ('+', None) [___, x, y] --> [___, x+y]
# - ('-', None) [___, x, y] --> [___, x-y]
# * ('*', None) [___, x, y] --> [___, x*y]
# / ('/', None) [___, x, y] --> [___, x//y]
# % ('%', None) [___, x, y] --> [___, x%y]
# & ('&', None) [___, x, y] --> [___, x&y] # bitwise and
# | ('|', None) [___, x, y] --> [___, x|y] # bitwise or
# ^ ('^', None) [___, x, y] --> [___, x^y] # bitwise xor
# ~ ('~', None) [___, x] --> [___, ~x] # bitwise not
# @ ('@', None) [___, x] --> [___, abs(x)]
# ! ('!', None) [___, x] --> [___, int(x == 0)]
# D ('D', None) [___, x] --> [___, x - 1] # [D]ecrement
# I ('I', None) [___, x] --> [___, x + 1] # [I]ncrement
#
# C ('C', None) [___, x] --> [___, x, x] # [C]opy
# X ('X', None) [___, x, y] --> [___, y, x] # e[X]change
# Z ('Z', None) [___, x] --> [___] # [Z]ap
#
# : name --- defines label name (maps to index of next insn)
# J name ('J', index) [J]ump to instruction at index
# E name ('E', index) pop stack; jump to index if [E]qual to zero
# G name ('G', index) pop stack; jump to index if [G]reater than zero
# L name ('L', index) pop stack; jump to index if [L]ess than zero
# N name ('N', index) pop stack; jump to index if [N]ot equal to zero
#
# P name ('P', name) [P]ush variable on stack (or zero if not defined)
# S name ('S', name) pop stack, [S]tore into var (create if doesn't exist)
#
# A ('A', None) pop stack; write chr with that [A]SCII code to output
# R ('R', None) [R]ead next number from input list; push on stack
# W ('W', None) pop stack; [W]rite number (as string) to output
import re
lexer_rules = [
# These regexes are tried in order, from first to last, and the
# first matching regex is used. If two different regexes might match
# the same text, put the preferred regex first (usually the one matching
# the longest token). The function for a regex takes the string s
# that matched the regex and returns the corresponding token. If the
# result is None, the lexer ignores it and finds the next token.
(re.compile(r'\s+'), lambda s: None),
(re.compile(r'[ACDEGIJLNPRSWXZ]'), lambda s: s),
(re.compile(r'[a-z]+'), lambda s: s),
(re.compile(r'[0-9]+'), lambda s: int(s)),
(re.compile(r'[-+*/%&|^~:!@]'), lambda s: s),
(re.compile(r'`[^`]*`'), lambda s: s),
(re.compile(r'.'), lambda s: '#?' + str(ord(s))),
]
def make_lexer(text, pos=0):
"""
Generates a sequence of tokens from the given string starting at
the given position, using lexer_rules, and yielding None to
indicate the end of the string.
"""
while pos < len(text):
for rule in lexer_rules:
regex, make_token = rule
match = regex.match(text, pos)
if match:
pos = match.end()
token = make_token(match.group())
if token is not None:
yield token
break
yield None
class CompileError(Exception):
def __init__(self, message):
self.message = message
def check_name(opcode, token):
" Ensure that this token, following opcode, is a name. "
if token is None or not token.islower():
raise CompileError(opcode + ' must be followed by a name')
return token
def compile(program):
"""
Convert the program into a list of VM instructions. For
uniformity, every VM instruction is a pair containing a
one-character opcode and an argument; see the table above. The
compiler detects undefined labels and duplicate labels. Labels
don't appear as VM instructions, and jump targets are converted
from labels to the index of the corresponding instruction in the
VM list.
"""
vm = []
labels = {}
lexer = make_lexer(program)
token = next(lexer)
while token is not None:
if isinstance(token, int):
vm.append(('#', token))
elif token == ':':
name = check_name(':', next(lexer))
if name in labels:
raise CompileError('duplicate label ' + name)
else:
labels[name] = len(vm)
elif token in 'EGJLN':
opcode = token
name = check_name(opcode, next(lexer))
vm.append((opcode, labels.get(name, name)))
elif token in 'PS':
opcode = token
name = check_name(opcode, next(lexer))
vm.append((opcode, name))
elif token[0] == '`':
vm.append(('`', token[1:-1]))
elif token.islower():
raise CompileError('name ' + token + ' has no preceding opcode')
elif len(token) == 1:
vm.append((token, None))
else:
raise CompileError('unrecognized opcode ' + token)
token = next(lexer)
# backpatch: fill in addresses for forward jumps
for i in range(len(vm)):
opcode, arg = vm[i]
if opcode in 'EGJLN' and isinstance(arg, str):
if arg in labels:
vm[i] = (opcode, labels[arg])
else:
raise CompileError('undefined label ' + arg)
return vm
def runvm(vm, rinputs):
"""
Runs the vm, starting at instruction 0, using rinputs as the
reversed list of inputs. In other words, the last input in the
list is the first to be read, so you can use rinputs.pop(), a
constant-time operation, to get the next input for an R opcode.
"""
stack = [] # runtime stack of integers; grows to the right
output = [] # strings that will be joined to form output
vars = {} # dictionary mapping varnames to integer values
ip = 0 # instruction pointer: index of next VM insn
while 0 <= ip < len(vm):
opcode, arg = vm[ip]
if opcode == '#':
stack.append(arg)
elif opcode == '`':
output.append(arg)
elif opcode == '+':
y = stack.pop()
x = stack.pop()
stack.append(x + y)
elif opcode == '-':
y = stack.pop()
x = stack.pop()
stack.append(x - y)
elif opcode == '*':
y = stack.pop()
x = stack.pop()
stack.append(x * y)
elif opcode == '/':
y = stack.pop()
x = stack.pop()
stack.append(x//y)
elif opcode == '%':
y = stack.pop()
x = stack.pop()
stack.append(x%y)
elif opcode == '&':
y = stack.pop()
x = stack.pop()
stack.append(x&y)
elif opcode == '|':
y = stack.pop()
x = stack.pop()
stack.append(x|y)
elif opcode == '^':
y = stack.pop()
x = stack.pop()
stack.append(x^y)
elif opcode == '~':
x = stack.pop()
stack.append(~x)
elif opcode == '@':
x = stack.pop()
stack.append(abs(x))
elif opcode == '!':
x = stack.pop()
stack.append(int(x==0))
elif opcode == 'D':
x = stack.pop()
stack.append(x-1)
elif opcode == 'I':
x = stack.pop()
stack.append(x+1)
elif opcode == 'C':
x = stack.pop()
stack.append(x)
stack.append(x)
elif opcode == 'X':
y = stack.pop()
x = stack.pop()
stack.append(y)
stack.append(x)
elif opcode == 'Z':
stack.pop()
elif opcode == 'J':
ip = arg - 1 # ip is incremented at the bottom of the loop
elif opcode == 'E':
number = stack.pop()
if number == 0:
ip = arg - 1
elif opcode == 'G':
number = stack.pop()
if number > 0:
ip = arg - 1
elif opcode == 'L':
number = stack.pop()
if number < 0:
ip = arg - 1
elif opcode == 'N':
number = stack.pop()
if number != 0:
ip = arg - 1
elif opcode == 'P':
if arg not in vars:
stack.append(0)
else:
stack.append(vars[arg])
elif opcode == 'S':
number = stack.pop()
vars[arg] = number
elif opcode == 'A':
code = stack.pop()
output.append(chr(code))
elif opcode == 'R':
stack.append(rinputs.pop())
elif opcode == 'W':
number = stack.pop()
output.append(str(number))
else:
raise Exception('unknown opcode ' + opcode)
ip += 1
return ''.join(output)
def run(program, inputs=None):
"""
Compiles the program into a list of VM instructions, reverses the
list of inputs (so that they can be efficiently .pop()'ed when
[R]ead), then runs the VM. The result is the output string.
>>> run('`2 + 2 is `2C+W33A')
'2 + 2 is 4!'
>>> run('2 3 + 4 *')
''
>>> run('2 3 + 4 * W')
'20'
>>> run('9:aCLqC97+ADJa:q')
'jihgfedcba'
>>> run('1 RCSaW `^` RCW ` = ` :loop CEendDXPa*X Jloop :end ZW', [3, 4])
'3^4 = 81'
>>> [run('RC!W`,`@W', [n - 4]) for n in range(9)]
['0,4', '0,3', '0,2', '0,1', '1,0', '0,1', '0,2', '0,3', '0,4']
>>> [run('RCEb0CISbSa:aDCEbPbCPa+SbSaJa:bPbW', [n]) for n in range(13)]
['0', '1', '1', '2', '3', '5', '8', '13', '21', '34', '55', '89', '144']
>>> [run('0RSn:tPn2%48+Pn2/CSnEgJt:gACExJg:x', [n]) for n in range(11)]
['0', '1', '10', '11', '100', '101', '110', '111', '1000', '1001', '1010']
>>> run('0RSn:tPn2%48+Pn2/CSnEgJt:gACExJg:x', [2000654321])
'1110111001111111000111111110001'
"""
rinputs = [] if inputs is None else list(reversed(inputs))
try:
output = runvm(compile(program), rinputs)
except CompileError as e:
print('Error:', e.message)
return ''
return output
if __name__ == '__main__':
import doctest
doctest.testmod()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment