Skip to content

Instantly share code, notes, and snippets.

@skeeto
Created May 10, 2024 22:39
Show Gist options
  • Save skeeto/230df9162352ab686c4f788bffd2f5dd to your computer and use it in GitHub Desktop.
Save skeeto/230df9162352ab686c4f788bffd2f5dd to your computer and use it in GitHub Desktop.
Mini lisp-like interpreter
// Mini lisp-like interpreter
// $ cc -o lisp lisp.c
// $ ./lisp "(cons (+ 1 2 3 (* -40 million)) (cdr (quote (1 2 3))))"
// (-39999994 2 3)
// This is free and unencumbered software released into the public domain.
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#define assert(c) while (!(c)) *(volatile int *)0 = 0
#define new(a, t, n) (t *)alloc(a, sizeof(t), _Alignof(t), n)
#define S(s) (str){s, sizeof(s)-1}
typedef struct {
char *beg;
char *end;
} arena;
static char *alloc(arena *a, ptrdiff_t size, ptrdiff_t align, ptrdiff_t count)
{
ptrdiff_t pad = (uintptr_t)a->end & (align - 1);
assert(count < (a->end - a->beg - pad)/size);
return memset(a->end -= size*count + pad, 0, size*count);
}
typedef struct {
char *data;
ptrdiff_t len;
} str;
static str dup(str s, arena *perm)
{
str r = s;
r.data = new(perm, char, s.len+1);
memcpy(r.data, s.data, s.len);
return r;
}
static _Bool equals(str a, str b)
{
return a.len==b.len && (!a.len || !memcmp(a.data, b.data, a.len));
}
static uint64_t hash(str s)
{
uint64_t r = 0x100;
for (ptrdiff_t i = 0; i < s.len; i++) {
r ^= s.data[i] & 255;
r *= 1111111111111111111u;
}
return r;
}
static str import(char *s)
{
str r = {0};
r.data = s;
for (; s[r.len]; r.len++) {}
return r;
}
static str slice(str s, ptrdiff_t beg, ptrdiff_t end)
{
return (str){s.data+beg, end-beg};
}
static str skipspace(str s)
{
for (; s.len && *s.data<=' '; s = slice(s, 1, s.len)) {}
return s;
}
static int64_t parseint(str s)
{
int64_t r = 0;
int64_t sign = +1;
assert(s.len);
switch (*s.data) {
case '-': sign = -1; s = slice(s, 1, s.len); break;
case '+': sign = +1; s = slice(s, 1, s.len); break;
}
for (ptrdiff_t i = 0; i < s.len; i++) {
r = (uint64_t)r*10 + (s.data[i] - '0');
}
return (uint64_t)r * sign;
}
typedef enum {
TOKEN_EOF,
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_SYMBOL,
TOKEN_INTEGER,
} tokentype;
typedef struct {
str head;
str tail;
tokentype type;
} token;
static token next(str s)
{
token r = {0};
s = skipspace(s);
if (!s.len) return r;
ptrdiff_t len = 1;
switch (s.data[0]) {
case '(':
r.type = TOKEN_LPAREN;
break;
case ')':
r.type = TOKEN_RPAREN;
break;
case '-': case '+':
if (s.len<2 || s.data[1]<'0' || s.data[1]>'9') goto symbol;
// fallthrough
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
r.type = TOKEN_INTEGER;
for (; len<s.len && s.data[len]>='0' && s.data[len]<='9'; len++) {}
break;
default:
symbol:
r.type = TOKEN_SYMBOL;
for (; len<s.len && s.data[len]>' '
&& s.data[len]!='('
&& s.data[len]!=')'; len++) {}
}
r.head = slice(s, 0, len);
r.tail = slice(s, len, s.len);
return r;
}
typedef enum {
VALUE_INTEGER,
VALUE_SYMBOL,
VALUE_CONS,
VALUE_PROC,
} valuetype;
typedef struct value value;
typedef struct {
value *car;
value *cdr;
} cons;
typedef struct {
str name;
value *value;
} symbol;
typedef struct strtab strtab;
typedef value *(*proc)(value *args, strtab **st, arena *perm);
struct value {
union {
int64_t integer;
symbol symbol;
cons cons;
proc proc;
};
valuetype type;
};
static value *newinteger(int64_t x, arena *perm)
{
value *r = new(perm, value, 1);
r->integer = x;
r->type = VALUE_INTEGER;
return r;
}
struct strtab {
strtab *child[4];
value key;
};
static value *intern(strtab **t, str name, arena *perm)
{
for (uint64_t h = hash(name); *t; h <<= 2) {
if (equals(name, (*t)->key.symbol.name)) {
return &(*t)->key;
}
t = &(*t)->child[h>>62];
}
*t = new(perm, strtab, 1);
(*t)->key.symbol.name = dup(name, perm);
(*t)->key.type = VALUE_SYMBOL;
return &(*t)->key;
}
static value *newcons(value *car, value *cdr, arena *perm)
{
value *r = new(perm, value, 1);
r->type = VALUE_CONS;
r->cons.car = car;
r->cons.cdr = cdr;
return r;
}
static value *newproc(proc p, arena *perm)
{
value *r = new(perm, value, 1);
r->proc = p;
r->type = VALUE_PROC;
return r;
}
typedef struct {
value *value;
str tail;
_Bool eof;
} parsed;
static parsed parse(str s, strtab **st, arena *perm)
{
parsed r = {0};
token t = {0};
t.tail = s;
for (;;) {
t = next(t.tail);
switch (t.type) {
case TOKEN_EOF:
r.eof = 1;
return r;
case TOKEN_LPAREN:;
value *nil = intern(st, S("nil"), perm);
value *head = nil;
value **tail = &head;
for (;;) {
parsed n = parse(t.tail, st, perm);
t.tail = n.tail;
if (n.eof) return r; // error
if (!n.value) break; // rparen
*tail = newcons(n.value, nil, perm);
tail = &(*tail)->cons.cdr;
}
r.value = head ? head : nil;
r.tail = t.tail;
return r;
case TOKEN_RPAREN:
r.tail = t.tail;
return r;
case TOKEN_SYMBOL:
r.value = intern(st, t.head, perm);
r.tail = t.tail;
return r;
case TOKEN_INTEGER:
r.value = newinteger(parseint(t.head), perm);
r.tail = t.tail;
return r;
}
}
}
static void print(value *v, strtab **st, arena *perm)
{
switch (v->type) {
case VALUE_INTEGER:
printf("%lld", (long long)v->integer);
break;
case VALUE_SYMBOL:
fwrite(v->symbol.name.data, v->symbol.name.len, 1, stdout);
break;
case VALUE_CONS:
putchar('(');
value *nil = intern(st, S("nil"), perm);
for (;;) {
print(v->cons.car, st, perm);
v = v->cons.cdr;
if (v == nil) break;
if (v->type != VALUE_CONS) {
fputs(" . ", stdout);
print(v, st, perm);
break;
}
putchar(' ');
}
putchar(')');
break;
case VALUE_PROC:
puts("<proc>");
break;
}
}
static value *eval(value *args, strtab **st, arena *perm)
{
value *nil = intern(st, S("nil"), perm);
switch (args->type) {
case VALUE_CONS:;
value *v = args->cons.car;
switch (v->type) {
case VALUE_CONS:
case VALUE_INTEGER:
return nil; // error
case VALUE_SYMBOL:
if (v == intern(st, S("quote"), perm)) {
if (args->cons.cdr->type != VALUE_CONS) {
return nil; // error
}
return args->cons.cdr->cons.car;
}
value *proc = v->symbol.value;
if (!proc || proc->type != VALUE_PROC) {
return nil; // error
}
value *head = nil;
value **tail = &head;
v = args->cons.cdr;
for (; v->type == VALUE_CONS; v = v->cons.cdr) {
value *arg = eval(v->cons.car, st, perm);
*tail = newcons(arg, nil, perm);
tail = &(*tail)->cons.cdr;
}
return proc->proc(head, st, perm);
case VALUE_PROC:
return v;
}
break;
case VALUE_SYMBOL:
if (!args->symbol.value) {
return nil; // error: unbound
}
return args->symbol.value;
case VALUE_INTEGER:
return args;
case VALUE_PROC:
return args;
}
assert(0);
return 0;
}
static value *proc_add(value *args, strtab **st, arena *perm)
{
value *r = newinteger(0, perm);
for (; args->type == VALUE_CONS; args = args->cons.cdr) {
if (args->cons.car->type == VALUE_INTEGER) {
r->integer += (uint64_t)args->cons.car->integer;
}
}
return r;
}
static value *proc_mul(value *args, strtab **st, arena *perm)
{
value *r = newinteger(1, perm);
for (; args->type == VALUE_CONS; args = args->cons.cdr) {
if (args->cons.car->type == VALUE_INTEGER) {
r->integer *= (uint64_t)args->cons.car->integer;
}
}
return r;
}
static value *proc_car(value *args, strtab **st, arena *perm)
{
if (args->cons.car->type == VALUE_CONS) {
return args->cons.car->cons.car;
}
return intern(st, S("nil"), perm); // error
}
static value *proc_cdr(value *args, strtab **st, arena *perm)
{
if (args->cons.car->type == VALUE_CONS) {
return args->cons.car->cons.cdr;
}
return intern(st, S("nil"), perm); // error
}
static value *proc_cons(value *args, strtab **st, arena *perm)
{
value *car = args->cons.car;
value *cdr = args->cons.cdr;
if (cdr->type != VALUE_CONS) {
intern(st, S("nil"), perm); // error
}
return newcons(car, cdr->cons.car, perm);
}
static void define(strtab **st, str name, value *v, arena *perm)
{
intern(st, name, perm)->symbol.value = v;
}
int main(int argc, char **argv)
{
static char heap[1<<24]; // NOTE: technically strict aliasing issue
arena scratch = {heap, heap+sizeof(heap)};
for (int i = 1; i < argc; i++) {
// Each argv is evaluated in a fresh context, freed at the end
// of the loop iteraiton.
arena temp = scratch;
strtab *st = 0;
define(&st, S("+"), newproc(proc_add, &temp), &temp);
define(&st, S("*"), newproc(proc_mul, &temp), &temp);
define(&st, S("car"), newproc(proc_car, &temp), &temp);
define(&st, S("cdr"), newproc(proc_cdr, &temp), &temp);
define(&st, S("cons"), newproc(proc_cons, &temp), &temp);
define(&st, S("million"), newinteger(1000000, &temp), &temp);
value *v = parse(import(argv[i]), &st, &temp).value;
if (!v) {
puts("ERROR: invalid input");
continue;
}
value *r = eval(v, &st, &temp);
print(r, &st, &temp);
putchar('\n');
}
fflush(stdout);
return ferror(stderr);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment