Last active
July 1, 2023 21:33
-
-
Save coinconclusive/969a570befebe39c525de463291d827f to your computer and use it in GitHub Desktop.
simple interpreter in C with a mark and sweep GC. wip.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Copyright © 2023 Anne Redko */ | |
/* Under the MIT License. https://mit-license.org/ */ | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <ctype.h> | |
#include <stdint.h> | |
#include <stddef.h> | |
#include <stdarg.h> | |
#define UNLIKELY(X) __builtin_expect((X), 0) | |
#define LIKELY(X) __builtin_expect((X), 1) | |
#define UNREACHABLE() __builtin_unreachable(); | |
#define DEBUG | |
#ifdef DEBUG | |
#define FAIL() (void)(*(volatile int*)0 = 0) | |
#define ASSERT(e) (LIKELY(e) ? (void)0 : FAIL()) | |
#else | |
#define FAIL() abort() | |
#define ASSERT(e) (void)(e); | |
#endif | |
#ifdef VERBOSE | |
#define SHOULD_PRINT_GC_SWEEPS 1 | |
#else | |
#define SHOULD_PRINT_GC_SWEEPS 0 | |
#endif | |
static inline const char *offset_nullable_ptr_safe(const char *ptr, size_t offset) { | |
return ptr == NULL ? ptr : ptr + offset; | |
} | |
enum ast_type { | |
AST_INT, | |
AST_VAR, | |
AST_STR, | |
AST_BIN, | |
AST_INV, | |
AST_FUN, | |
AST_BLK, | |
AST_LET, | |
AST_RET, | |
AST_TRN, | |
AST_SET, | |
AST_UNR, | |
AST_ARR, | |
}; | |
enum ast_bin_type { | |
AST_BIN_ADD, | |
AST_BIN_SUB, | |
AST_BIN_MUL, | |
AST_BIN_DIV, | |
AST_BIN_AND, | |
AST_BIN_BAND, | |
AST_BIN_OR, | |
AST_BIN_BOR, | |
AST_BIN_XOR, | |
AST_BIN_SHR, | |
AST_BIN_SHL, | |
AST_BIN_MOD, | |
AST_BIN_EQL, | |
AST_BIN_NEQ, | |
AST_BIN_GEQ, | |
AST_BIN_LEQ, | |
AST_BIN_LTH, | |
AST_BIN_GTH, | |
}; | |
const char *ast_bin_type_to_string(enum ast_bin_type type) { | |
static const char *names[] = { | |
"add", | |
"substract", | |
"multiply", | |
"divide", | |
"and", | |
"binary and", | |
"or", | |
"binary or", | |
"xor", | |
"shift right", | |
"shift left", | |
"modulo", | |
"equals", | |
"not equals", | |
"greater or equals", | |
"less or equals", | |
"less than", | |
"greater than", | |
}; | |
return names[type]; | |
} | |
enum ast_unr_type { | |
AST_UNR_NOT, | |
AST_UNR_NEG, | |
AST_UNR_BNEG, | |
}; | |
const char *ast_unr_type_to_string(enum ast_unr_type type) { | |
static const char *names[] = { | |
"boolean not", | |
"negate", | |
"binary negate", | |
}; | |
return names[type]; | |
} | |
#define MEMALLOC(T) ((T*)calloc(1, sizeof(T))) | |
struct where { const char *fname; uint32_t line, col; } __attribute__((packed)); | |
struct ast_base { enum ast_type type; struct where where; }; | |
struct ast_var { struct ast_base base; char *name; }; | |
struct ast_int { struct ast_base base; intmax_t value; }; | |
struct ast_str { struct ast_base base; char *value; }; | |
struct ast_bin { struct ast_base base; enum ast_bin_type type; struct ast_base *lhs, *rhs; }; | |
struct ast_inv { struct ast_base base; struct ast_base *fun; size_t arg_count; struct ast_base **args; }; | |
struct ast_fun { struct ast_base base; struct ast_base *body; uint32_t arg_count, min_arg_count; char **args; }; | |
struct ast_blk { struct ast_base base; size_t stmt_count; struct ast_base **stmts; }; | |
struct ast_let { struct ast_base base; char *name; struct ast_base *value; }; | |
struct ast_trn { struct ast_base base; struct ast_base *cond, *then, *other; }; | |
struct ast_set { struct ast_base base; char *name; struct ast_base *value; }; | |
struct ast_ret { struct ast_base base; struct ast_base *value; int error; }; | |
struct ast_unr { struct ast_base base; enum ast_unr_type type; struct ast_base *opr; }; | |
struct ast_arr { struct ast_base base; size_t elem_count; struct ast_base **elems; }; | |
struct ast_var *ast_var_new(const char *name, const struct where *where) { | |
struct ast_var *n = MEMALLOC(struct ast_var); | |
n->base.type = AST_VAR; | |
n->base.where = *where; | |
n->name = malloc(strlen(name) + 1); | |
memcpy(n->name, name, strlen(name) + 1); | |
return n; | |
} | |
struct ast_int *ast_int_new(int value, const struct where *where) { | |
struct ast_int *n = MEMALLOC(struct ast_int); | |
n->base.type = AST_INT; | |
n->base.where = *where; | |
n->value = value; | |
return n; | |
} | |
struct ast_str *ast_str_new(const char *value, const struct where *where) { | |
struct ast_str *n = MEMALLOC(struct ast_str); | |
n->base.type = AST_STR; | |
n->base.where = *where; | |
n->value = malloc(strlen(value) + 1); | |
memcpy(n->value, value, strlen(value) + 1); | |
return n; | |
} | |
struct ast_inv *ast_inv_new(struct ast_base *fun, size_t arg_count, struct ast_base **args, const struct where *where) { | |
struct ast_inv *n = MEMALLOC(struct ast_inv); | |
n->base.type = AST_INV; | |
n->base.where = *where; | |
n->fun = fun; | |
n->arg_count = arg_count; | |
if (n->arg_count == 0) n->args = NULL; | |
else { | |
n->args = calloc(n->arg_count, sizeof(struct ast_base*)); | |
for (size_t i = 0; i < n->arg_count; ++i) { | |
n->args[i] = args[i]; | |
} | |
} | |
return n; | |
} | |
struct ast_blk *ast_blk_new(size_t stmt_count, struct ast_base **stmts, const struct where *where) { | |
struct ast_blk *n = MEMALLOC(struct ast_blk); | |
n->base.type = AST_BLK; | |
n->base.where = *where; | |
n->stmt_count = stmt_count; | |
if (n->stmt_count == 0) n->stmts = NULL; | |
else { | |
n->stmts = calloc(n->stmt_count, sizeof(struct ast_base*)); | |
for (size_t i = 0; i < n->stmt_count; ++i) { | |
n->stmts[i] = stmts[i]; | |
} | |
} | |
return n; | |
} | |
struct ast_let *ast_let_new(const char *name, struct ast_base *value, const struct where *where) { | |
struct ast_let *n = MEMALLOC(struct ast_let); | |
n->base.type = AST_LET; | |
n->base.where = *where; | |
n->name = malloc(strlen(name) + 1); | |
memcpy(n->name, name, strlen(name) + 1); | |
n->value = value; | |
return n; | |
} | |
struct ast_ret *ast_ret_new(struct ast_base *value, int error, const struct where *where) { | |
struct ast_ret *n = MEMALLOC(struct ast_ret); | |
n->base.type = AST_RET; | |
n->base.where = *where; | |
n->value = value; | |
n->error = error; | |
return n; | |
} | |
struct ast_unr *ast_unr_new(enum ast_unr_type type, struct ast_base *opr, const struct where *where) { | |
struct ast_unr *n = MEMALLOC(struct ast_unr); | |
n->base.type = AST_UNR; | |
n->base.where = *where; | |
n->type = type; | |
n->opr = opr; | |
return n; | |
} | |
struct ast_set *ast_set_new(const char *name, struct ast_base *value, const struct where *where) { | |
struct ast_set *n = MEMALLOC(struct ast_set); | |
n->base.type = AST_SET; | |
n->base.where = *where; | |
n->name = malloc(strlen(name) + 1); | |
memcpy(n->name, name, strlen(name) + 1); | |
n->value = value; | |
return n; | |
} | |
struct ast_trn *ast_trn_new(struct ast_base *cond, struct ast_base *then, struct ast_base *other, const struct where *where) { | |
struct ast_trn *n = MEMALLOC(struct ast_trn); | |
n->base.type = AST_TRN; | |
n->base.where = *where; | |
n->cond = cond; | |
n->then = then; | |
n->other = other; | |
return n; | |
} | |
struct ast_fun *ast_fun_new( | |
struct ast_base *body, | |
uint32_t arg_count, | |
uint32_t min_arg_count, | |
char **args, | |
const struct where *where | |
) { | |
struct ast_fun *n = MEMALLOC(struct ast_fun); | |
n->base.type = AST_FUN; | |
n->base.where = *where; | |
n->body = body; | |
n->arg_count = arg_count; | |
n->min_arg_count = min_arg_count; | |
if (n->arg_count == 0) n->args = NULL; | |
else { | |
n->args = calloc(n->arg_count, sizeof(char*)); | |
for (size_t i = 0; i < n->arg_count; ++i) { | |
n->args[i] = malloc(strlen(args[i]) + 1); | |
memcpy(n->args[i], args[i], strlen(args[i]) + 1); | |
} | |
} | |
return n; | |
} | |
struct ast_bin *ast_bin_new(enum ast_bin_type type, struct ast_base *lhs, struct ast_base *rhs, const struct where *where) { | |
struct ast_bin *n = MEMALLOC(struct ast_bin); | |
n->base.type = AST_BIN; | |
n->base.where = *where; | |
n->type = type; | |
n->lhs = lhs; | |
n->rhs = rhs; | |
return n; | |
} | |
struct ast_arr *ast_arr_new(size_t elem_count, struct ast_base **elems, const struct where *where) { | |
struct ast_arr *n = MEMALLOC(struct ast_arr); | |
n->base.type = AST_ARR; | |
n->base.where = *where; | |
n->elem_count = elem_count; | |
if (n->elem_count == 0) n->elems = NULL; | |
else { | |
n->elems = calloc(n->elem_count, sizeof(struct ast_base*)); | |
for (size_t i = 0; i < n->elem_count; ++i) { | |
n->elems[i] = elems[i]; | |
} | |
} | |
return n; | |
} | |
void print_ast(struct ast_base *n, int indent, FILE *fout) { | |
for (int i = 0; i < indent; ++i) fputs(" ", fout); | |
if (n == NULL) { fprintf(fout, "NULL\n"); return; } | |
switch (n->type) { | |
case AST_INT: fprintf(fout, "int %ld\n", ((struct ast_int*)n)->value); return; | |
case AST_VAR: fprintf(fout, "var '%s'\n", ((struct ast_var*)n)->name); return; | |
case AST_STR: fprintf(fout, "str \"%s\"\n", ((struct ast_str*)n)->value); return; | |
case AST_BIN: | |
fprintf(fout, "bin: %s\n", ast_bin_type_to_string(((struct ast_bin*)n)->type)); | |
print_ast(((struct ast_bin*)n)->lhs, indent + 1, fout); | |
print_ast(((struct ast_bin*)n)->rhs, indent + 1, fout); | |
return; | |
case AST_UNR: | |
fprintf(fout, "unr: %s\n", ast_unr_type_to_string(((struct ast_unr*)n)->type)); | |
print_ast(((struct ast_unr*)n)->opr, indent + 1, fout); | |
return; | |
case AST_INV: fprintf(fout, "inv:\n"); | |
print_ast(((struct ast_inv*)n)->fun, indent + 1, fout); | |
for (size_t i = 0; i < ((struct ast_inv*)n)->arg_count; ++i) | |
print_ast(((struct ast_inv*)n)->args[i], indent + 1, fout); | |
return; | |
case AST_FUN: fprintf(fout, "fun: |"); | |
for (size_t i = 0; i < ((struct ast_fun*)n)->arg_count; ++i) { | |
fprintf(fout, "%s", ((struct ast_fun*)n)->args[i]); | |
if (i != ((struct ast_fun*)n)->arg_count - 1) fprintf(fout, ", "); | |
} | |
fprintf(fout, "|\n"); | |
print_ast(((struct ast_fun*)n)->body, indent + 1, fout); | |
return; | |
case AST_SET: fprintf(fout, "set: %s =\n", ((struct ast_set*)n)->name); | |
print_ast(((struct ast_set*)n)->value, indent + 1, fout); | |
return; | |
case AST_LET: fprintf(fout, "let: %s :=\n", ((struct ast_set*)n)->name); | |
print_ast(((struct ast_set*)n)->value, indent + 1, fout); | |
return; | |
case AST_RET: fprintf(fout, "ret%s:\n", ((struct ast_ret*)n)->error ? " (error)" : ""); | |
print_ast(((struct ast_ret*)n)->value, indent + 1, fout); | |
return; | |
case AST_TRN: fprintf(fout, "trn:\n"); | |
print_ast(((struct ast_trn*)n)->cond, indent + 1, fout); | |
print_ast(((struct ast_trn*)n)->then, indent + 1, fout); | |
print_ast(((struct ast_trn*)n)->other, indent + 1, fout); | |
return; | |
case AST_BLK: fprintf(fout, "blk:\n"); | |
for (size_t i = 0; i < ((struct ast_blk*)n)->stmt_count; ++i) { | |
print_ast(((struct ast_blk*)n)->stmts[i], indent + 1, fout); | |
} | |
return; | |
case AST_ARR: fprintf(fout, "arr:\n"); | |
for (size_t i = 0; i < ((struct ast_arr*)n)->elem_count; ++i) { | |
print_ast(((struct ast_arr*)n)->elems[i], indent + 1, fout); | |
} | |
return; | |
default: fprintf(stderr, "print: unknown ast type: %d!\n", n->type); return; | |
} | |
} | |
struct ast_base *copy_ast(struct ast_base *n) { | |
if (n == NULL) { fprintf(stderr, "copy_ast(NULL)!\n"); FAIL(); return NULL; } | |
switch (n->type) { | |
case AST_INT: return &ast_int_new(((struct ast_int*)n)->value, &n->where)->base; | |
case AST_VAR: return &ast_var_new(((struct ast_var*)n)->name, &n->where)->base; | |
case AST_STR: return &ast_str_new(((struct ast_str*)n)->value, &n->where)->base; | |
case AST_BIN: | |
return &ast_bin_new(((struct ast_bin*)n)->type, | |
copy_ast(((struct ast_bin*)n)->lhs), | |
copy_ast(((struct ast_bin*)n)->rhs), | |
&n->where | |
)->base; | |
case AST_UNR: | |
return &ast_unr_new(((struct ast_unr*)n)->type, | |
copy_ast(((struct ast_unr*)n)->opr), | |
&n->where | |
)->base; | |
case AST_INV: { | |
struct ast_base **args = calloc(sizeof(struct ast_base*), ((struct ast_inv*)n)->arg_count); | |
for (size_t i = 0; i < ((struct ast_inv*)n)->arg_count; ++i) { | |
args[i] = copy_ast(((struct ast_inv*)n)->args[i]); | |
} | |
struct ast_inv *r = ast_inv_new( | |
copy_ast(((struct ast_inv*)n)->fun), | |
((struct ast_inv*)n)->arg_count, | |
args, | |
&n->where | |
); | |
if (((struct ast_inv*)n)->arg_count > 0) | |
free(args); | |
return &r->base; | |
} | |
case AST_BLK: { | |
struct ast_base **stmts = calloc(sizeof(struct ast_base*), ((struct ast_blk*)n)->stmt_count); | |
for (size_t i = 0; i < ((struct ast_blk*)n)->stmt_count; ++i) { | |
stmts[i] = copy_ast(((struct ast_blk*)n)->stmts[i]); | |
} | |
struct ast_blk *r = ast_blk_new(((struct ast_blk*)n)->stmt_count, stmts, &n->where); | |
if (((struct ast_blk*)n)->stmt_count > 0) free(stmts); | |
return &r->base; | |
} | |
case AST_TRN: | |
return &ast_trn_new( | |
copy_ast(((struct ast_trn*)n)->cond), | |
copy_ast(((struct ast_trn*)n)->then), | |
((struct ast_trn*)n)->other == NULL ? NULL : copy_ast(((struct ast_trn*)n)->other), | |
&n->where | |
)->base; | |
case AST_FUN: | |
return &ast_fun_new( | |
copy_ast(((struct ast_fun*)n)->body), | |
((struct ast_fun*)n)->arg_count, | |
((struct ast_fun*)n)->min_arg_count, | |
((struct ast_fun*)n)->args, | |
&n->where | |
)->base; | |
case AST_LET: | |
return &ast_let_new( | |
((struct ast_let*)n)->name, | |
copy_ast(((struct ast_let*)n)->value), | |
&n->where | |
)->base; | |
case AST_SET: | |
return &ast_set_new( | |
((struct ast_set*)n)->name, | |
copy_ast(((struct ast_set*)n)->value), | |
&n->where | |
)->base; | |
case AST_RET: | |
return &ast_ret_new( | |
copy_ast(((struct ast_ret*)n)->value), | |
((struct ast_ret*)n)->error, | |
&n->where | |
)->base; | |
case AST_ARR: { | |
struct ast_base **elems = NULL; | |
if (((struct ast_arr*)n)->elem_count > 0) | |
elems = calloc(sizeof(struct ast_base*), ((struct ast_arr*)n)->elem_count); | |
for (size_t i = 0; i < ((struct ast_arr*)n)->elem_count; ++i) { | |
elems[i] = copy_ast(((struct ast_arr*)n)->elems[i]); | |
} | |
struct ast_arr *r = ast_arr_new(((struct ast_arr*)n)->elem_count, elems, &n->where); | |
if (((struct ast_arr*)n)->elem_count > 0) free(elems); | |
return &r->base; | |
} | |
default: | |
fprintf(stderr, "copy: unknown ast type: %d!\n", n->type); | |
return NULL; | |
} | |
} | |
void free_ast(struct ast_base *n) { | |
if (n == NULL) { fprintf(stderr, "free_ast(NULL)!\n"); return; } | |
switch (n->type) { | |
case AST_INT: break; | |
case AST_VAR: free(((struct ast_var*)n)->name); break; | |
case AST_STR: free(((struct ast_str*)n)->value); break; | |
case AST_BIN: | |
free_ast(((struct ast_bin*)n)->lhs); | |
free_ast(((struct ast_bin*)n)->rhs); | |
break; | |
case AST_UNR: | |
free_ast(((struct ast_unr*)n)->opr); | |
break; | |
case AST_INV: | |
free_ast(((struct ast_inv*)n)->fun); | |
for (size_t i = 0; i < ((struct ast_inv*)n)->arg_count; ++i) { | |
free_ast(((struct ast_inv*)n)->args[i]); | |
((struct ast_inv*)n)->args[i] = NULL; | |
} | |
if (((struct ast_inv*)n)->arg_count > 0) | |
free(((struct ast_inv*)n)->args); | |
break; | |
case AST_BLK: | |
for (size_t i = 0; i < ((struct ast_blk*)n)->stmt_count; ++i) { | |
free_ast(((struct ast_blk*)n)->stmts[i]); | |
((struct ast_blk*)n)->stmts[i] = NULL; | |
} | |
if (((struct ast_blk*)n)->stmt_count > 0) | |
free(((struct ast_blk*)n)->stmts); | |
break; | |
case AST_TRN: | |
free_ast(((struct ast_trn*)n)->cond); | |
free_ast(((struct ast_trn*)n)->then); | |
if (((struct ast_trn*)n)->other != NULL) | |
free_ast(((struct ast_trn*)n)->other); | |
break; | |
case AST_FUN: | |
free_ast(((struct ast_fun*)n)->body); | |
for (size_t i = 0; i < ((struct ast_fun*)n)->arg_count; ++i) { | |
free(((struct ast_fun*)n)->args[i]); | |
((struct ast_fun*)n)->args[i] = NULL; | |
} | |
if (((struct ast_fun*)n)->arg_count > 0) | |
free(((struct ast_fun*)n)->args); | |
break; | |
case AST_LET: free(((struct ast_let*)n)->name); free_ast(((struct ast_let*)n)->value); break; | |
case AST_SET: free(((struct ast_set*)n)->name); free_ast(((struct ast_set*)n)->value); break; | |
case AST_RET: free_ast(((struct ast_ret*)n)->value); break; | |
case AST_ARR: | |
for (size_t i = 0; i < ((struct ast_arr*)n)->elem_count; ++i) { | |
free_ast(((struct ast_arr*)n)->elems[i]); | |
((struct ast_arr*)n)->elems[i] = NULL; | |
} | |
if (((struct ast_arr*)n)->elem_count > 0) | |
free(((struct ast_arr*)n)->elems); | |
break; | |
default: fprintf(stderr, "free: unknown ast type: %d!\n", n->type); break; | |
} | |
free(n); | |
} | |
enum token_type { | |
TOKEN_EOF, | |
TOKEN_VAR, | |
TOKEN_INT, | |
TOKEN_STR, | |
TOKEN_ARR, // => | |
TOKEN_EAR, // =!> | |
TOKEN_EQL, // == | |
TOKEN_NEQ, // != | |
TOKEN_GEQ, // >= | |
TOKEN_LEQ, // <= | |
TOKEN_SHR, // >> | |
TOKEN_SHL, // << | |
TOKEN_AND, // && | |
TOKEN_OR, // || | |
TOKEN_ADDEQ, // += | |
TOKEN_SUBEQ, // -= | |
TOKEN_MULEQ, // *= | |
TOKEN_DIVEQ, // /= | |
TOKEN_MODEQ, // %= | |
TOKEN_SHREQ, // >>= | |
TOKEN_SHLEQ, // <<= | |
TOKEN_OREQ, // ||= | |
TOKEN_BOREQ, // |= | |
TOKEN_ANDEQ, // &&= | |
TOKEN_BANDEQ, // &= | |
TOKEN_OPEQ_FIRST_ = TOKEN_ADDEQ, | |
TOKEN_OPEQ_LAST_ = TOKEN_BANDEQ, | |
}; | |
const char *token_type_to_string(enum token_type type) { | |
static const char *names[] = { | |
"end of input", | |
"variable", | |
"integer", | |
"string", | |
"'=>'", | |
"'=!>'", | |
"'=='", | |
"'!='", | |
"'>='", | |
"'<='", | |
"'>>'", | |
"'<<'", | |
"'&&'", | |
"'||'", | |
"'+='", | |
"'-='", | |
"'*='", | |
"'/='", | |
"'%='", | |
"'>>='", | |
"'<<='", | |
"'||='", | |
"'|='", | |
"'&&='", | |
"'&='", | |
}; | |
return names[type]; | |
} | |
void print_where(struct where where, FILE *fout) { | |
fprintf(fout, "%s:%d:%d", where.fname, where.line + 1, where.col + 1); | |
} | |
struct token { struct where where; char type; size_t value_offset; }; | |
struct lexer { | |
size_t tok_count; | |
struct token *toks; | |
size_t data_size; | |
size_t data_alloced; | |
char *data; | |
}; | |
#define TOKEN_VALUE(LEX, TOK) (offset_nullable_ptr_safe((LEX)->data, (TOK).value_offset)) | |
void print_token(const struct lexer *lex, const struct token *token, FILE *fout) { | |
if (token->type == TOKEN_VAR || token->type == TOKEN_INT || token->type == TOKEN_STR) { | |
fprintf(stdout, "%s: '%s'\n", token_type_to_string(token->type), TOKEN_VALUE(lex, *token)); | |
} else if (token->type < ' ') { | |
fprintf(stdout, "%s\n", token_type_to_string(token->type)); | |
} else { | |
fprintf(stdout, "'%c'\n", token->type); | |
} | |
} | |
void lexer_free(struct lexer *lex) { | |
if (lex->tok_count > 0) free(lex->toks); | |
if (lex->data_alloced > 0) free(lex->data); | |
} | |
void lexer_push(struct lexer *lex, char c) { | |
if (lex->data_size >= lex->data_alloced) { | |
lex->data_alloced += 64; | |
if (lex->data_alloced == 64) lex->data = malloc(64); | |
else lex->data = realloc(lex->data, lex->data_alloced); | |
} | |
lex->data_size += 1; | |
lex->data[lex->data_size - 1] = c; | |
} | |
void lexer_add(struct lexer *lex, char type, size_t value_offset, struct where where) { | |
lex->tok_count += 1; | |
if (lex->tok_count == 1) lex->toks = malloc(sizeof(struct token)); | |
else lex->toks = realloc(lex->toks, sizeof(struct token) * lex->tok_count); | |
lex->toks[lex->tok_count - 1].type = type; | |
lex->toks[lex->tok_count - 1].value_offset = value_offset; | |
lex->toks[lex->tok_count - 1].where = where; | |
} | |
char lexer_eat(struct where *where, FILE *fin) { | |
char c = fgetc(fin); | |
if (!isprint(c) && !isspace(c) && c != EOF) { | |
fprintf(stderr, "illegal character '\\x%02x'!\n", c); | |
exit(1); | |
} | |
if (c == '\n') { | |
where->col = 0; | |
where->line += 1; | |
} else { | |
where->col += 1; | |
} | |
return c; | |
} | |
void lexer_add_from(struct lexer *lex, FILE *fin, const char *fname) { | |
struct where where = { .col = 0, .line = 0, .fname = fname }; | |
char c = lexer_eat(&where, fin); | |
while (c != EOF) { | |
while (isspace(c)) c = lexer_eat(&where, fin); | |
while (c == '#') { | |
while (c != '\n') c = lexer_eat(&where, fin); | |
while (isspace(c)) c = lexer_eat(&where, fin); | |
} | |
struct where where_start = where; | |
where_start.col -= 1; | |
if (c == EOF) break; | |
else if (isalpha(c) || c == '_') { | |
size_t start = lex->data_size; | |
do { | |
lexer_push(lex, c); | |
c = lexer_eat(&where, fin); | |
} while (isalnum(c) || c == '_'); | |
lexer_push(lex, '\0'); | |
lexer_add(lex, TOKEN_VAR, start, where_start); | |
} else if (isdigit(c)) { | |
size_t start = lex->data_size; | |
do { | |
lexer_push(lex, c); | |
c = lexer_eat(&where, fin); | |
} while (isdigit(c) || c == '_'); | |
lexer_push(lex, '\0'); | |
lexer_add(lex, TOKEN_INT, start, where_start); | |
} else if (c == '"') { | |
size_t start = lex->data_size; | |
c = lexer_eat(&where, fin); | |
while (c != '"' && c != EOF) { | |
if (c == '\\') { | |
c = lexer_eat(&where, fin); | |
switch (c) { | |
case 'n': c = '\n'; break; | |
case 't': c = '\t'; break; | |
case 'r': c = '\r'; break; | |
case '0': c = '\0'; break; | |
case 'a': c = '\a'; break; | |
case 'e': c = '\033'; break; | |
case '"': c = '"'; break; | |
case '\'': c = '\''; break; | |
case '\\': c = '\\'; break; | |
case EOF: | |
default: | |
fprintf(stderr, "warning: bad escape sequence: '\\%c'!\n", c); | |
c = EOF; | |
break; | |
} | |
} | |
if (c != EOF) lexer_push(lex, c); | |
c = lexer_eat(&where, fin); | |
} | |
c = lexer_eat(&where, fin); | |
lexer_push(lex, '\0'); | |
lexer_add(lex, TOKEN_STR, start, where_start); | |
} else if (c == '=') { | |
c = lexer_eat(&where, fin); | |
if (c == '=') { | |
c = lexer_eat(&where, fin); | |
lexer_add(lex, TOKEN_EQL, 0, where_start); | |
} else if (c == '>') { | |
c = lexer_eat(&where, fin); | |
lexer_add(lex, TOKEN_ARR, 0, where_start); | |
} else if (c == '!') { | |
c = lexer_eat(&where, fin); | |
if (c == '>') { | |
c = lexer_eat(&where, fin); | |
lexer_add(lex, TOKEN_EAR, 0, where_start); | |
} else { | |
lexer_add(lex, TOKEN_EQL, 0, where_start); | |
lexer_add(lex, '>', 0, where_start); | |
} | |
} else lexer_add(lex, '=', 0, where_start); | |
} else if (c == '!') { | |
c = lexer_eat(&where, fin); | |
if (c == '=') { | |
c = lexer_eat(&where, fin); | |
lexer_add(lex, TOKEN_NEQ, 0, where_start); | |
} else lexer_add(lex, '!', 0, where_start); | |
} | |
#define EQ_TOKEN(X, T) { \ | |
c = lexer_eat(&where, fin); \ | |
if (c == '=') { \ | |
lexer_add(lex, T, 0, where_start); \ | |
c = lexer_eat(&where, fin); \ | |
} else lexer_add(lex, X, 0, where_start); \ | |
} | |
#define EQ_2TOKEN(X, TE, T2, T2E) { \ | |
c = lexer_eat(&where, fin); \ | |
if (c == '=') lexer_add(lex, TE, 0, where_start); \ | |
if (c == X) { \ | |
c = lexer_eat(&where, fin); \ | |
if (c == '=') lexer_add(lex, T2E, 0, where_start); \ | |
else { lexer_add(lex, T2, 0, where_start); } \ | |
} \ | |
else { lexer_add(lex, X, 0, where_start); } \ | |
} | |
else if (c == '+') EQ_TOKEN('+', TOKEN_ADDEQ) | |
else if (c == '-') EQ_TOKEN('-', TOKEN_SUBEQ) | |
else if (c == '*') EQ_TOKEN('*', TOKEN_MULEQ) | |
else if (c == '/') EQ_TOKEN('/', TOKEN_DIVEQ) | |
else if (c == '%') EQ_TOKEN('%', TOKEN_MODEQ) | |
else if (c == '&') EQ_2TOKEN('&', TOKEN_BANDEQ, TOKEN_AND, TOKEN_ANDEQ) | |
else if (c == '|') EQ_2TOKEN('|', TOKEN_BOREQ, TOKEN_OR, TOKEN_OREQ) | |
else if (c == '>') EQ_2TOKEN('>', TOKEN_GEQ, TOKEN_SHR, TOKEN_SHREQ) | |
else if (c == '<') EQ_2TOKEN('<', TOKEN_LEQ, TOKEN_SHL, TOKEN_SHLEQ) | |
#undef EQ_TOKEN | |
#undef EQ_2TOKEN | |
else { | |
lexer_add(lex, c, 0, where_start); | |
c = lexer_eat(&where, fin); | |
} | |
} | |
where.col -= 1; | |
lexer_add(lex, TOKEN_EOF, 0, where); | |
} | |
enum ast_bin_type token_type_opeq_to_ast_bin_type(enum token_type type) { | |
switch (type) { | |
case TOKEN_ADDEQ: return AST_BIN_ADD; | |
case TOKEN_SUBEQ: return AST_BIN_SUB; | |
case TOKEN_MULEQ: return AST_BIN_MUL; | |
case TOKEN_DIVEQ: return AST_BIN_DIV; | |
case TOKEN_MODEQ: return AST_BIN_MOD; | |
case TOKEN_SHREQ: return AST_BIN_SHR; | |
case TOKEN_SHLEQ: return AST_BIN_SHL; | |
case TOKEN_OREQ: return AST_BIN_OR; | |
case TOKEN_BOREQ: return AST_BIN_BOR; | |
case TOKEN_ANDEQ: return AST_BIN_AND; | |
case TOKEN_BANDEQ: return AST_BIN_BAND; | |
default: fprintf(stderr, "tokeneqop->bintype: bad type\n"); exit(1); | |
} | |
} | |
struct err_ast { struct ast_base *ast; const char *error; const struct token *rest; }; | |
struct err_asts { size_t ast_count; struct ast_base **asts; const char *error; const struct token *rest; }; | |
#define ERR_AST_IS_ERR(X) ((X).error != NULL) | |
#define ERR_AST_IS_AST(X) ((X).ast != NULL) | |
struct parser { | |
struct lexer *lex; | |
}; | |
struct err_ast parse_int(struct parser *p, const struct token *in) { | |
if (in->type != TOKEN_INT) return (struct err_ast){ NULL, "expected variable", in }; | |
return (struct err_ast){ &ast_int_new(atoi(TOKEN_VALUE(p->lex, *in)), &in->where)->base, NULL, in + 1 }; | |
} | |
struct err_ast parse_var(struct parser *p, const struct token *in) { | |
if (in->type != TOKEN_VAR) return (struct err_ast){ NULL, "expected variable", in }; | |
struct ast_var *n = ast_var_new(TOKEN_VALUE(p->lex, *in), &in->where); | |
return (struct err_ast){ &n->base, NULL, in + 1 }; | |
} | |
struct err_ast parse_str(struct parser *p, const struct token *in) { | |
if (in->type != TOKEN_STR) return (struct err_ast){ NULL, "expected string", in }; | |
struct ast_str *n = ast_str_new(TOKEN_VALUE(p->lex, *in), &in->where); | |
return (struct err_ast){ &n->base, NULL, in + 1 }; | |
} | |
static inline void safe_eat_token(const struct token **in) { | |
*in = (*in)->type == TOKEN_EOF ? *in : ++*in; | |
} | |
struct err_ast parse_expr(struct parser *p, const struct token *in); | |
struct err_ast parse_stmt(struct parser *p, const struct token *in); | |
struct err_asts parse_stmts(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_stmt(p, in); | |
if (ERR_AST_IS_ERR(r)) return (struct err_asts){ 0, NULL, r.error, r.rest }; | |
in = r.rest; | |
struct ast_base **stmts = malloc(sizeof(struct ast_base*)); | |
size_t stmt_count = 1; | |
stmts[0] = r.ast; | |
while (in->type == ';') { | |
safe_eat_token(&in); | |
if (in->type == '}' || in->type == TOKEN_EOF) break; | |
r = parse_stmt(p, in); | |
if (ERR_AST_IS_ERR(r)) { | |
for (size_t i = 0; i < stmt_count; ++i) free_ast(stmts[i]); | |
if (stmt_count > 0) free(stmts); | |
return (struct err_asts){ 0, NULL, r.error, r.rest }; | |
} | |
in = r.rest; | |
stmt_count += 1; | |
stmts = realloc(stmts, sizeof(struct ast_base*) * stmt_count); | |
stmts[stmt_count - 1] = r.ast; | |
} | |
return (struct err_asts){ stmt_count, stmts, NULL, in }; | |
} | |
struct err_ast parse_block(struct parser *p, const struct token *in) { | |
if (in->type != '{') return (struct err_ast){ NULL, "expected '{'", in }; | |
safe_eat_token(&in); | |
size_t stmt_count = 0; | |
struct ast_base **stmts = NULL; | |
if ((in + 1)->type != '}') { | |
struct err_asts r = parse_stmts(p, in); | |
if (ERR_AST_IS_ERR(r)) return (struct err_ast){ NULL, r.error, r.rest }; | |
in = r.rest; | |
stmt_count = r.ast_count; | |
stmts = r.asts; | |
if (in->type != '}') { | |
for (size_t i = 0; i < stmt_count; ++i) free_ast(stmts[i]); | |
if (stmt_count > 0) free(stmts); | |
return (struct err_ast){ NULL, "expected closing bracket", in }; | |
} | |
} else safe_eat_token(&in); | |
safe_eat_token(&in); | |
struct err_ast r = { &ast_blk_new(stmt_count, stmts, &in->where)->base, NULL, in }; | |
if (stmt_count > 0) free(stmts); | |
return r; | |
} | |
struct err_ast parse_array(struct parser *p, const struct token *in) { | |
if (in->type != '[') return (struct err_ast){ NULL, "expected '['", in }; | |
size_t elem_count = 0; | |
struct ast_base **elems = NULL; | |
if ((in + 1)->type != ']') { | |
do { | |
safe_eat_token(&in); | |
struct err_ast r = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r)) { | |
for (size_t i = 0; i < elem_count; ++i) free_ast(elems[i]); | |
if (elem_count > 0) free(elems); | |
return r; | |
} | |
in = r.rest; | |
elem_count += 1; | |
if (elem_count == 1) elems = malloc(sizeof(struct ast_base*)); | |
else elems = realloc(elems, sizeof(struct ast_base*) * elem_count); | |
elems[elem_count - 1] = r.ast; | |
} while (in->type == ','); | |
if (in->type != ']') { | |
for (size_t i = 0; i < elem_count; ++i) free_ast(elems[i]); | |
if (elem_count > 0) free(elems); | |
return (struct err_ast){ NULL, "expected closing ']'", in }; | |
} | |
} else safe_eat_token(&in); | |
safe_eat_token(&in); | |
struct err_ast r = { &ast_arr_new(elem_count, elems, &in->where)->base, NULL, in }; | |
if (elem_count > 0) free(elems); | |
return r; | |
} | |
struct err_ast parse_atom(struct parser *p, const struct token *in) { | |
if (in->type == '!' || in->type == '-' || in->type == '~') { | |
enum ast_unr_type t | |
= in->type == '!' ? AST_UNR_NOT | |
: in->type == '-' ? AST_UNR_NEG | |
: in->type == '~' ? AST_UNR_BNEG : 0; | |
safe_eat_token(&in); | |
struct err_ast r = parse_atom(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
return (struct err_ast){ &ast_unr_new(t, r.ast, &in->where)->base, NULL, r.rest }; | |
} | |
struct err_ast r; | |
r = parse_var(p, in); | |
if (ERR_AST_IS_AST(r)) return r; | |
r = parse_int(p, in); | |
if (ERR_AST_IS_AST(r)) return r; | |
r = parse_str(p, in); | |
if (ERR_AST_IS_AST(r)) return r; | |
if (in->type == '{') { return parse_block(p, in); } | |
if (in->type == '[') { return parse_array(p, in); } | |
if (in->type == '(') { | |
safe_eat_token(&in); | |
r = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
if (in->type != ')') { | |
free_ast(r.ast); | |
return (struct err_ast){ NULL, "expected closing parenthesis (subexpr)", in }; | |
} | |
safe_eat_token(&in); | |
return (struct err_ast){ r.ast, NULL, in }; | |
} | |
return (struct err_ast){ NULL, "expected atom", in }; | |
} | |
struct err_ast parse_fun(struct parser *p, const struct token *in, int *body_error) { | |
*body_error = 0; | |
if (in->type != '(' && in->type != TOKEN_VAR) | |
return (struct err_ast){ NULL, "expected function arguments", in }; | |
uint32_t arg_count = 0, min_arg_count = 0; | |
int optional = 0; | |
char **args = NULL; | |
if (in->type != TOKEN_VAR) { // not one argument | |
if ((in + 1)->type != ')') { | |
int first = 1; | |
do { | |
safe_eat_token(&in); | |
if (in->type == '?' && !optional) { | |
optional = 1; | |
safe_eat_token(&in); | |
} | |
struct err_ast r2 = parse_var(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
for (size_t i = 0; i < arg_count; ++i) free(args[i]); | |
if (arg_count > 0) free(args); | |
// *body_error = !first; | |
return (struct err_ast){ NULL, "expected argument name", in }; | |
} | |
in = r2.rest; | |
arg_count += 1; | |
if (!optional) min_arg_count += 1; | |
if (arg_count == 1) args = malloc(sizeof(char*)); | |
else args = realloc(args, sizeof(char*) * arg_count); | |
char *argname = ((struct ast_var*)r2.ast)->name; | |
args[arg_count - 1] = malloc(strlen(argname) + 1); | |
memcpy(args[arg_count - 1], argname, strlen(argname) + 1); | |
free_ast(r2.ast); // we don't need the node anymore. find a better way of doing this? | |
first = 0; | |
} while (in->type == ','); | |
if (in->type != ')') { | |
for (size_t i = 0; i < arg_count; ++i) free(args[i]); | |
if (arg_count > 0) free(args); | |
return (struct err_ast){ NULL, "expected closing parenthesis", in }; | |
} | |
} else safe_eat_token(&in); // eat the opening paren | |
} else { | |
arg_count = 1; | |
args = calloc(1, sizeof(char*)); | |
size_t arglen = strlen(TOKEN_VALUE(p->lex, *in)) + 1; | |
args[0] = malloc(arglen); | |
memcpy(args[0], TOKEN_VALUE(p->lex, *in), arglen); | |
} | |
safe_eat_token(&in); | |
if (in->type != TOKEN_ARR) { // TODO: error arrows? | |
for (size_t i = 0; i < arg_count; ++i) free(args[i]); | |
if (arg_count > 0) free(args); | |
return (struct err_ast){ NULL, "expected arrow", in }; | |
} | |
safe_eat_token(&in); | |
struct err_ast r = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r)) { | |
for (size_t i = 0; i < arg_count; ++i) free(args[i]); | |
if (arg_count > 0) free(args); | |
*body_error = 1; | |
return r; | |
} | |
in = r.rest; | |
r = (struct err_ast){ &ast_fun_new(r.ast, arg_count, min_arg_count, args, &in->where)->base, NULL, in }; | |
for (size_t i = 0; i < arg_count; ++i) free(args[i]); | |
if (arg_count > 0) free(args); | |
return r; | |
} | |
struct err_ast parse_inv(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_atom(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
while (in->type == '(') { | |
size_t arg_count = 0; | |
struct ast_base **args = NULL; | |
if ((in + 1)->type != ')') { | |
struct err_ast r_arg; | |
do { | |
safe_eat_token(&in); | |
r_arg = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r_arg)) { | |
for (size_t i = 0; i < arg_count; ++i) free_ast(args[i]); | |
if (arg_count > 0) free(args); | |
free_ast(r.ast); | |
return r_arg; | |
} | |
in = r_arg.rest; | |
arg_count += 1; | |
if (arg_count == 1) args = malloc(sizeof(struct ast_base*)); | |
else args = realloc(args, sizeof(struct ast_base*) * arg_count); | |
args[arg_count - 1] = r_arg.ast; | |
} while (in->type == ','); | |
if (in->type != ')') { | |
for (size_t i = 0; i < arg_count; ++i) free_ast(args[i]); | |
if (arg_count > 0) free(args); | |
free_ast(r.ast); | |
return (struct err_ast){ NULL, "expected closing parenthesis (inv)", in }; | |
} | |
} else safe_eat_token(&in); | |
safe_eat_token(&in); | |
int had_block = 0; | |
if (in->type == '(' || in->type == TOKEN_VAR) { | |
int body_error; | |
struct err_ast r_fun = parse_fun(p, in, &body_error); | |
if (ERR_AST_IS_AST(r_fun)) { | |
in = r_fun.rest; | |
arg_count += 1; | |
if (arg_count == 1) args = malloc(sizeof(struct ast_base*)); | |
else args = realloc(args, sizeof(struct ast_base*) * arg_count); | |
args[arg_count - 1] = r_fun.ast; | |
had_block = 1; | |
} else if (ERR_AST_IS_ERR(r_fun) && body_error) { | |
return r; | |
} | |
} | |
r = (struct err_ast){ &ast_inv_new(r.ast, arg_count, args, &in->where)->base, NULL, in }; | |
if (arg_count > 0) free(args); | |
if (had_block) break; | |
} | |
return r; | |
} | |
struct err_ast parse_bandbor(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_inv(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
while (in->type == '&' || in->type == '|') { | |
enum ast_bin_type t = in->type == '&' ? AST_BIN_BAND : AST_BIN_BOR; | |
safe_eat_token(&in); | |
struct err_ast r2 = parse_inv(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
free_ast(r.ast); | |
return r2; | |
} | |
in = r2.rest; | |
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in }; | |
} | |
return r; | |
} | |
struct err_ast parse_muldiv(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_bandbor(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
while (in->type == '*' || in->type == '/') { | |
enum ast_bin_type t = in->type == '*' ? AST_BIN_MUL : AST_BIN_DIV; | |
safe_eat_token(&in); | |
struct err_ast r2 = parse_bandbor(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
free_ast(r.ast); | |
return r2; | |
} | |
in = r2.rest; | |
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in }; | |
} | |
return r; | |
} | |
struct err_ast parse_addsub(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_muldiv(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
while (in->type == '+' || in->type == '-') { | |
enum ast_bin_type t = in->type == '+' ? AST_BIN_ADD : AST_BIN_SUB; | |
safe_eat_token(&in); | |
struct err_ast r2 = parse_muldiv(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
free_ast(r.ast); | |
return r2; | |
} | |
in = r2.rest; | |
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in }; | |
} | |
return r; | |
} | |
struct err_ast parse_cmp(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_addsub(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
while (in->type == '<' || in->type == '>' || in->type == TOKEN_GEQ || | |
in->type == TOKEN_LEQ || in->type == TOKEN_EQL || in->type == TOKEN_NEQ) { | |
enum ast_bin_type t; | |
switch (in->type) { | |
case '<': t = AST_BIN_LTH; break; | |
case '>': t = AST_BIN_GTH; break; | |
case TOKEN_GEQ: t = AST_BIN_GEQ; break; | |
case TOKEN_LEQ: t = AST_BIN_LEQ; break; | |
case TOKEN_EQL: t = AST_BIN_EQL; break; | |
case TOKEN_NEQ: t = AST_BIN_NEQ; break; | |
default: UNREACHABLE(); | |
} | |
safe_eat_token(&in); | |
struct err_ast r2 = parse_addsub(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
free_ast(r.ast); | |
return r2; | |
} | |
in = r2.rest; | |
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in }; | |
} | |
return r; | |
} | |
struct err_ast parse_andor(struct parser *p, const struct token *in) { | |
struct err_ast r; | |
r = parse_cmp(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
while (in->type == TOKEN_AND || in->type == TOKEN_OR) { | |
enum ast_bin_type t = in->type == '+' ? AST_BIN_ADD : AST_BIN_SUB; | |
safe_eat_token(&in); | |
struct err_ast r2 = parse_cmp(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
free_ast(r.ast); | |
return r2; | |
} | |
in = r2.rest; | |
r = (struct err_ast){ &ast_bin_new(t, r.ast, r2.ast, &in->where)->base, NULL, in }; | |
} | |
return r; | |
} | |
struct err_ast parse_primary(struct parser *p, const struct token *in) { | |
return parse_andor(p, in); | |
} | |
struct err_ast parse_stmt(struct parser *p, const struct token *in) { | |
if (in->type == TOKEN_ARR || in->type == TOKEN_EAR | |
|| (in->type == TOKEN_VAR && strcmp(TOKEN_VALUE(p->lex, *in), "return") == 0)) { | |
int error = in->type == TOKEN_EAR; | |
safe_eat_token(&in); | |
struct err_ast r = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
return (struct err_ast){ &ast_ret_new(r.ast, error, &in->where)->base, NULL, r.rest }; | |
} | |
if (in->type == TOKEN_VAR && strcmp(TOKEN_VALUE(p->lex, *in), "if") == 0) { | |
safe_eat_token(&in); | |
struct err_ast r_cond = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r_cond)) return r_cond; | |
in = r_cond.rest; | |
struct err_ast r_then = parse_block(p, in); | |
if (ERR_AST_IS_ERR(r_then)) { | |
free_ast(r_cond.ast); | |
return r_then; | |
} | |
in = r_then.rest; | |
struct ast_base *other = NULL; | |
if (in->type == TOKEN_VAR && strcmp(TOKEN_VALUE(p->lex, *in), "else") == 0) { | |
safe_eat_token(&in); | |
struct err_ast r_other = parse_block(p, in); | |
if (ERR_AST_IS_ERR(r_other)) { | |
free_ast(r_cond.ast); | |
free_ast(r_then.ast); | |
return r_other; | |
} | |
in = r_other.rest; | |
other = r_other.ast; | |
} | |
return (struct err_ast){ &ast_trn_new(r_cond.ast, r_then.ast, other, &in->where)->base, NULL, in }; | |
} | |
return parse_expr(p, in); | |
} | |
struct err_ast parse_expr(struct parser *p, const struct token *in) { | |
if (in->type == '(' || in->type == TOKEN_VAR) { | |
int body_error; | |
struct err_ast r = parse_fun(p, in, &body_error); | |
if (ERR_AST_IS_AST(r)) return r; | |
if (ERR_AST_IS_ERR(r) && body_error) { | |
return r; | |
} | |
} | |
if ( | |
in->type == TOKEN_VAR && ( | |
(in + 1)->type == '=' || | |
(in + 1)->type == ':' || | |
((in + 1)->type >= TOKEN_OPEQ_FIRST_ && | |
(in + 1)->type <= TOKEN_OPEQ_LAST_)) | |
) { | |
struct err_ast r_name = parse_var(p, in); | |
if (ERR_AST_IS_ERR(r_name)) return r_name; | |
const char *name = ((struct ast_var*)r_name.ast)->name; | |
in = r_name.rest; | |
int is_let = (in->type == ':'); | |
if (is_let) safe_eat_token(&in); | |
if (in->type != '=' && !is_let) { | |
enum ast_bin_type t = token_type_opeq_to_ast_bin_type(in->type); | |
safe_eat_token(&in); | |
struct err_ast r_value = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r_value)) { | |
free_ast(r_name.ast); | |
return r_value; | |
} | |
return (struct err_ast){ | |
&ast_set_new(name, &ast_bin_new(t, r_name.ast, r_value.ast, &in->where)->base, &in->where)->base, | |
NULL, r_value.rest | |
}; | |
} else { | |
if (in->type != '=') { | |
return r_name; // not assignment, just a var (TODO: verify that this works) | |
} | |
safe_eat_token(&in); | |
} | |
struct err_ast r_value = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r_value)) { | |
free_ast(r_name.ast); | |
return r_value; | |
} | |
struct err_ast r; | |
if (is_let) { | |
r = (struct err_ast){ &ast_let_new(name, r_value.ast, &in->where)->base, NULL, r_value.rest }; | |
} else { | |
r = (struct err_ast){ &ast_set_new(name, r_value.ast, &in->where)->base, NULL, r_value.rest }; | |
} | |
free_ast(r_name.ast); | |
return r; | |
} | |
struct err_ast r = parse_primary(p, in); | |
if (ERR_AST_IS_ERR(r)) return r; | |
in = r.rest; | |
if (in->type == '?') { | |
safe_eat_token(&in); | |
struct err_ast r2 = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r2)) { | |
free_ast(r.ast); | |
return r2; | |
} | |
in = r2.rest; | |
struct err_ast r3 = { /* important! */ NULL }; | |
if (in->type == ':') { | |
safe_eat_token(&in); | |
r3 = parse_expr(p, in); | |
if (ERR_AST_IS_ERR(r3)) { | |
free_ast(r.ast); | |
free_ast(r2.ast); | |
return r3; | |
} | |
in = r3.rest; | |
} | |
return (struct err_ast){ &ast_trn_new(r.ast, r2.ast, r3.ast, &in->where)->base, NULL, in }; | |
} | |
return r; | |
} | |
enum value_type { VALUE_NIL, VALUE_INT, VALUE_EXT, VALUE_DAT, VALUE_OBJ, VALUE_RET }; | |
struct gc_obj_ctx; | |
struct gc { | |
size_t obj_count; | |
struct gc_obj_base **objs; | |
}; | |
enum gc_obj_type { GC_OBJ_STR, GC_OBJ_ARR, GC_OBJ_MAP, GC_OBJ_FUN, GC_OBJ_CTX }; | |
struct value; | |
struct gc_obj_base { enum gc_obj_type type; int marked; }; | |
struct eval_context { | |
struct gc *gc; | |
struct gc_obj_ctx *this; | |
size_t stack_size, stack_capacity; | |
struct value *stack; | |
size_t callstack_size, callstack_capacity; | |
struct callstack_entry *callstack; | |
}; | |
struct ret_value; | |
struct value_ext { | |
struct value (*func)(struct eval_context *ctx, size_t arg_count, struct value *args); | |
const char *name; | |
struct where where; | |
}; | |
struct value { | |
enum value_type type; | |
union { | |
intmax_t int_value; | |
struct gc_obj_base *obj_value; | |
void *dat_value; | |
struct ret_value *ret_value; | |
struct value_ext ext_value; | |
}; | |
}; | |
struct callstack_entry { | |
struct where call_location; | |
struct value callee; | |
}; | |
struct ret_value { | |
int error; | |
struct value value; | |
}; | |
struct gc_obj_str { struct gc_obj_base base; char *value; }; | |
struct gc_obj_arr { struct gc_obj_base base; size_t count; struct value *items; }; | |
struct gc_obj_fun { | |
struct gc_obj_base base; | |
struct gc_obj_ctx *ctx; | |
uint32_t arg_count, min_arg_count; | |
char **args; | |
struct ast_base *code; | |
struct where where; | |
char *name; | |
}; | |
struct gc_obj_ctx { | |
struct gc_obj_base base; | |
struct gc_obj_ctx *parent; | |
size_t var_count; | |
struct { | |
char *name; | |
struct value value; | |
} *vars; | |
}; | |
void gc_free(struct gc *gc) { | |
if (gc->obj_count > 0) free(gc->objs); | |
} | |
void gc_add(struct gc *gc, struct gc_obj_base *o) { | |
gc->obj_count += 1; | |
if (gc->obj_count == 1) gc->objs = malloc(sizeof(struct gc_obj_base*)); | |
else gc->objs = realloc(gc->objs, sizeof(struct gc_obj_base*) * gc->obj_count); | |
gc->objs[gc->obj_count - 1] = o; | |
} | |
struct gc_obj_str *gc_obj_str_new(struct gc *gc, char *value) { | |
struct gc_obj_str *o = MEMALLOC(struct gc_obj_str); | |
gc_add(gc, &o->base); | |
o->base.type = GC_OBJ_STR; | |
o->value = malloc(strlen(value) + 1); | |
memcpy(o->value, value, strlen(value) + 1); | |
return o; | |
} | |
struct gc_obj_str *gc_obj_str_new_moved(struct gc *gc, char *value) { | |
struct gc_obj_str *o = MEMALLOC(struct gc_obj_str); | |
gc_add(gc, &o->base); | |
o->base.type = GC_OBJ_STR; | |
o->value = value; | |
return o; | |
} | |
struct gc_obj_arr *gc_obj_arr_new_empty(struct gc *gc) { | |
struct gc_obj_arr *o = MEMALLOC(struct gc_obj_arr); | |
gc_add(gc, &o->base); | |
o->base.type = GC_OBJ_ARR; | |
o->count = 0; | |
o->items = NULL; | |
return o; | |
} | |
struct gc_obj_fun *gc_obj_fun_new( | |
struct gc *gc, | |
uint32_t arg_count, | |
uint32_t min_arg_count, | |
char **args, | |
struct gc_obj_ctx *ctx, | |
struct ast_base *code, | |
const char *name | |
) { | |
struct gc_obj_fun *o = MEMALLOC(struct gc_obj_fun); | |
gc_add(gc, &o->base); | |
o->base.type = GC_OBJ_FUN; | |
o->ctx = ctx; | |
o->code = copy_ast(code); | |
o->arg_count = arg_count; | |
o->min_arg_count = min_arg_count; | |
if (name) { | |
o->name = malloc(strlen(name) + 1); | |
memcpy(o->name, name, strlen(name) + 1); | |
} | |
else o->name = NULL; | |
if (o->arg_count > 0) { | |
o->args = calloc(o->arg_count, sizeof(char*)); | |
for (size_t i = 0; i < o->arg_count; ++i) { | |
o->args[i] = malloc(strlen(args[i]) + 1); | |
memcpy(o->args[i], args[i], strlen(args[i]) + 1); | |
} | |
} else { | |
o->args = NULL; | |
} | |
return o; | |
} | |
struct gc_obj_ctx *gc_obj_ctx_new(struct gc *gc, struct gc_obj_ctx *parent) { | |
struct gc_obj_ctx *o = MEMALLOC(struct gc_obj_ctx); | |
gc_add(gc, &o->base); | |
o->base.type = GC_OBJ_CTX; | |
o->parent = parent; | |
o->var_count = 0; | |
o->vars = NULL; | |
return o; | |
} | |
struct value gc_obj_ctx_get_var(struct gc_obj_ctx *ctx, const char *name) { | |
for (size_t i = 0; i < ctx->var_count; ++i) { | |
if (strcmp(ctx->vars[i].name, name) == 0) return ctx->vars[i].value; | |
} | |
if (ctx->parent != NULL) return gc_obj_ctx_get_var(ctx->parent, name); | |
fprintf(stderr, "no such variable: '%s'!\n", name); | |
exit(1); | |
} | |
void gc_obj_ctx_set_var(struct gc_obj_ctx *ctx, const char *name, struct value value) { | |
for (size_t i = 0; i < ctx->var_count; ++i) { | |
if (strcmp(ctx->vars[i].name, name) == 0) { | |
ctx->vars[i].value = value; | |
return; | |
} | |
} | |
if (ctx->parent != NULL) { | |
gc_obj_ctx_set_var(ctx->parent, name, value); | |
return; | |
} | |
fprintf(stderr, "no such variable: '%s'! (set)\n", name); | |
exit(1); | |
} | |
void gc_obj_ctx_set_new_var(struct gc_obj_ctx *ctx, const char *name, struct value value) { | |
for (size_t i = 0; i < ctx->var_count; ++i) { | |
if (strcmp(ctx->vars[i].name, name) == 0) | |
ctx->vars[i].value = value; | |
} | |
ctx->var_count += 1; | |
if (ctx->var_count == 1) { ctx->vars = calloc(1, sizeof(*ctx->vars)); } | |
else { ctx->vars = realloc(ctx->vars, sizeof(*ctx->vars) * ctx->var_count); } | |
ctx->vars[ctx->var_count - 1].name = malloc(strlen(name) + 1); | |
memcpy(ctx->vars[ctx->var_count - 1].name, name, strlen(name) + 1); | |
ctx->vars[ctx->var_count - 1].value = value; | |
} | |
void free_gc_obj(struct gc_obj_base *o) { | |
if (o == NULL) { fprintf(stderr, "free_gc_obj(NULL)!\n"); return; } | |
switch (o->type) { | |
case GC_OBJ_STR: { | |
free(((struct gc_obj_str*)o)->value); | |
} break; | |
case GC_OBJ_ARR: { | |
struct gc_obj_arr *arr = (struct gc_obj_arr*)o; | |
for (size_t i = 0; i < arr->count; ++i) | |
if (arr->items[i].type == VALUE_OBJ) | |
free_gc_obj(arr->items[i].obj_value); | |
if (arr->count > 0) free(arr->items); | |
} break; | |
case GC_OBJ_FUN: { | |
struct gc_obj_fun *fun = (struct gc_obj_fun*)o; | |
for (size_t i = 0; i < fun->arg_count; ++i) | |
free(fun->args[i]); | |
if (fun->arg_count > 0) free(fun->args); | |
free_ast(fun->code); | |
if (fun->name) free(fun->name); | |
} break; | |
case GC_OBJ_CTX: { | |
struct gc_obj_ctx *ctx = (struct gc_obj_ctx*)o; | |
for (size_t i = 0; i < ctx->var_count; ++i) | |
free(ctx->vars[i].name); | |
if (ctx->var_count > 0) free(ctx->vars); | |
} break; | |
case GC_OBJ_MAP: | |
default: | |
fprintf(stderr, "free: unknown gc object type: %d!\n", o->type); | |
break; | |
} | |
free(o); | |
} | |
size_t gc_obj_arr_insert(struct gc_obj_arr *arr, struct value item, size_t idx) { | |
ASSERT(idx <= arr->count); | |
arr->count += 1; | |
if (arr->count == 1) arr->items = malloc(sizeof(struct value)); | |
else arr->items = realloc(arr->items, sizeof(struct value) * arr->count); | |
memmove(arr->items + idx + 1, arr->items + idx, (arr->count - 1 - idx) * sizeof(*arr->items)); | |
arr->items[idx] = item; | |
return arr->count; | |
} | |
size_t gc_obj_arr_delete(struct gc_obj_arr *arr, struct value item, size_t idx) { | |
ASSERT(idx < arr->count); | |
memmove(arr->items + idx, arr->items + idx + 1, (arr->count - 1 - idx) * sizeof(*arr->items)); | |
arr->count -= 1; | |
if (arr->count == 0) free(arr->items); | |
else arr->items = realloc(arr->items, sizeof(struct value) * arr->count); | |
return arr->count; | |
} | |
size_t gc_obj_arr_push(struct gc_obj_arr *arr, struct value item) { | |
return gc_obj_arr_insert(arr, item, arr->count); | |
} | |
size_t gc_obj_arr_pop(struct gc_obj_arr *arr, struct value item) { | |
ASSERT(arr->count > 0); | |
return gc_obj_arr_insert(arr, item, arr->count); | |
} | |
struct outpipe { | |
void (*putc)(struct outpipe *self, char c); | |
void (*puts)(struct outpipe *self, const char *s); | |
}; | |
struct file_outpipe { | |
struct outpipe base; | |
FILE *fout; | |
}; | |
void file_outpipe_putc(struct outpipe *self, char c) { | |
fputc(c, ((struct file_outpipe*)self)->fout); | |
} | |
void file_outpipe_puts(struct outpipe *self, const char *s) { | |
fputs(s, ((struct file_outpipe*)self)->fout); | |
} | |
struct file_outpipe file_outpipe_of(FILE *fout) { | |
return (struct file_outpipe){ | |
.base.putc = &file_outpipe_putc, | |
.base.puts = &file_outpipe_puts, | |
.fout = fout | |
}; | |
} | |
struct str_outpipe { | |
struct outpipe base; | |
size_t size, alloced; | |
char *str; | |
}; | |
void str_outpipe_putc(struct outpipe *self, char c); | |
void str_outpipe_puts(struct outpipe *self, const char *s); | |
void str_outpipe_grow(struct str_outpipe *p, size_t size); | |
void str_outpipe_init(struct str_outpipe *p) { | |
p->base.putc = &str_outpipe_putc; | |
p->base.puts = &str_outpipe_puts; | |
p->alloced = 0; | |
p->size = 0; | |
p->str = NULL; | |
str_outpipe_grow(p, 1); | |
p->str[0] = '\0'; | |
} | |
void str_outpipe_free(struct str_outpipe *p) { | |
if (p->alloced > 0) free(p->str); | |
} | |
void str_outpipe_grow(struct str_outpipe *p, size_t size) { | |
p->size = size; | |
if (p->size >= p->alloced) { | |
p->alloced += 64; | |
if (p->alloced == 64) p->str = malloc(p->alloced); | |
else p->str = realloc(p->str, p->alloced); | |
} | |
} | |
void str_outpipe_putc(struct outpipe *self, char c) { | |
struct str_outpipe *p = (struct str_outpipe*)self; | |
str_outpipe_grow(p, p->size + 1); // p->size >= 2 now. | |
p->str[p->size - 2] = c; // this was '\0'. | |
p->str[p->size - 1] = '\0'; | |
} | |
void str_outpipe_puts(struct outpipe *self, const char *s) { | |
struct str_outpipe *p = (struct str_outpipe*)self; | |
size_t old_size = p->size; // >= 1. | |
str_outpipe_grow(p, p->size + strlen(s)); | |
memcpy(p->str + old_size - 1, s, strlen(s)); | |
p->str[p->size - 1] = '\0'; | |
} | |
__attribute__((format(printf, 2, 3))) | |
void printf_topipe(struct outpipe *p, const char *fmt, ...) { | |
va_list va; | |
va_start(va, fmt); | |
int sz = vsnprintf(NULL, 0, fmt, va); | |
va_end(va); | |
char buf[sz + 1]; | |
va_start(va, fmt); | |
vsnprintf(buf, sizeof buf, fmt, va); | |
va_end(va); | |
p->puts(p, buf); | |
} | |
enum print_flags { | |
PRINT_FLAG_COLOR = 1 << 0, | |
PRINT_FLAG_STRUCT = 1 << 1, | |
}; | |
#define LIT_COLOR(S, F) ((F) & PRINT_FLAG_COLOR ? "\033[33m" S "\033[m" : S) | |
#define KWD_COLOR(S, F) ((F) & PRINT_FLAG_COLOR ? "\033[35m" S "\033[m" : S) | |
void print_gc_obj(struct gc_obj_base *o, struct outpipe *fout, enum print_flags flags); | |
void print_value(struct value v, struct outpipe *fout, enum print_flags flags) { | |
switch (v.type) { | |
case VALUE_NIL: printf_topipe(fout, LIT_COLOR("nil", flags)); break; | |
case VALUE_INT: printf_topipe(fout, LIT_COLOR("%zd", flags), v.int_value); break; | |
case VALUE_EXT: printf_topipe(fout, "<%s %s>", KWD_COLOR("ext", flags), v.ext_value.name); break; | |
case VALUE_DAT: printf_topipe(fout, "<%s %p>", KWD_COLOR("dat", flags), v.dat_value); break; | |
case VALUE_OBJ: print_gc_obj(v.obj_value, fout, flags); break; | |
case VALUE_RET: | |
printf_topipe(fout, "<%s %s", KWD_COLOR("ret", flags), v.ret_value->error ? KWD_COLOR("error ", flags) : ""); | |
print_value(v.ret_value->value, fout, flags); | |
fout->putc(fout, '>'); | |
break; | |
default: fprintf(stderr, "print: unknown value type: %d!\n", v.type); return; | |
} | |
} | |
int gc_obj_equal(struct gc_obj_base *o1, struct gc_obj_base *o2); | |
int value_equal(struct value a, struct value b) { | |
if (a.type != b.type) return 0; | |
switch (a.type) { | |
case VALUE_NIL: return 1; | |
case VALUE_INT: return a.int_value == b.int_value; | |
case VALUE_EXT: return a.ext_value.func == b.ext_value.func; | |
case VALUE_DAT: return a.dat_value == b.dat_value; | |
case VALUE_OBJ: return gc_obj_equal(a.obj_value, b.obj_value); | |
case VALUE_RET: return 0; | |
default: fprintf(stderr, "equal: unknown value type: %d!\n", a.type); break; | |
} | |
return 0; | |
} | |
int gc_obj_equal(struct gc_obj_base *o1, struct gc_obj_base *o2) { | |
if (o1 == o2) return 1; | |
if (o1->type != o2->type) return 0; | |
switch (o1->type) { | |
case GC_OBJ_STR: | |
return strcmp(((struct gc_obj_str*)o1)->value, ((struct gc_obj_str*)o2)->value) == 0; | |
case GC_OBJ_FUN: | |
return 0; | |
case GC_OBJ_ARR: | |
if (((struct gc_obj_arr*)o1)->count != ((struct gc_obj_arr*)o2)->count) | |
return 0; | |
for (size_t i = 0; i < ((struct gc_obj_arr*)o1)->count; ++i) { | |
if (!value_equal(((struct gc_obj_arr*)o1)->items[i], ((struct gc_obj_arr*)o2)->items[i])) | |
return 0; | |
} | |
return 0; | |
default: fprintf(stderr, "equal: unknown gc object type: %d!\n", o1->type); | |
} | |
return 0; | |
} | |
static void print_escaped_string_(const char *str, struct outpipe *fout, int color) { | |
for (; *str; ++str) { | |
if (!isprint(*str)) { | |
if (color) fout->puts(fout, "\033[34m"); | |
fout->putc(fout, '\\'); | |
switch (*str) { | |
case '\n': fout->putc(fout, 'n'); break; | |
case '\t': fout->putc(fout, 't'); break; | |
case '\r': fout->putc(fout, 'r'); break; | |
case '\0': fout->putc(fout, '0'); break; | |
case '\a': fout->putc(fout, 'a'); break; | |
case '\"': fout->putc(fout, '"'); break; | |
case '\'': fout->putc(fout, '\''); break; | |
case '\\': fout->putc(fout, '\\'); break; | |
default: printf_topipe(fout, "%02x", *str); break; | |
} | |
if (color) fout->puts(fout, "\033[32m"); | |
} else fout->putc(fout, *str); | |
} | |
} | |
void print_gc_obj(struct gc_obj_base *o, struct outpipe *fout, enum print_flags flags) { | |
switch (o->type) { | |
case GC_OBJ_STR: | |
if (flags & PRINT_FLAG_STRUCT) { | |
fout->puts(fout, flags & PRINT_FLAG_COLOR ? "\033[32m\"" : "\""); | |
print_escaped_string_(((struct gc_obj_str*)o)->value, fout, flags & PRINT_FLAG_COLOR); | |
fout->puts(fout, flags & PRINT_FLAG_COLOR ? "\033[m\"" : "\""); | |
} else { | |
printf_topipe(fout, "%s", ((struct gc_obj_str*)o)->value); | |
} | |
break; | |
case GC_OBJ_FUN: | |
if (((struct gc_obj_fun*)o)->name != NULL) | |
printf_topipe(fout, "<%s %s>", KWD_COLOR("fun", flags), ((struct gc_obj_fun*)o)->name); | |
else | |
printf_topipe(fout, "<%s %p>", KWD_COLOR("fun", flags), o); | |
break; | |
case GC_OBJ_CTX: printf_topipe(fout, "<%s %p>", KWD_COLOR("ctx", flags), o); break; | |
case GC_OBJ_ARR: | |
printf_topipe(fout, LIT_COLOR("#%zu", flags), ((struct gc_obj_arr*)o)->count); | |
fout->putc(fout, '['); | |
for (size_t i = 0; i < ((struct gc_obj_arr*)o)->count; ++i) { | |
print_value(((struct gc_obj_arr*)o)->items[i], fout, flags); | |
if (i != ((struct gc_obj_arr*)o)->count - 1) fout->puts(fout, ", "); | |
} | |
fout->putc(fout, ']'); | |
break; | |
default: fprintf(stderr, "print: unknown gc object type: %d!\n", o->type); return; | |
} | |
} | |
void gc_mark(struct gc *gc, struct gc_obj_base *o) { | |
if (o->marked) return; | |
o->marked = 1; | |
switch (o->type) { | |
case GC_OBJ_STR: break; | |
case GC_OBJ_FUN: | |
gc_mark(gc, &((struct gc_obj_fun*)o)->ctx->base); | |
break; | |
case GC_OBJ_ARR: | |
for (size_t i = 0; i < ((struct gc_obj_arr*)o)->count; ++i) | |
if (((struct gc_obj_arr*)o)->items[i].type == VALUE_OBJ) | |
gc_mark(gc, ((struct gc_obj_arr*)o)->items[i].obj_value); | |
break; | |
case GC_OBJ_CTX: | |
if (((struct gc_obj_ctx*)o)->parent) gc_mark(gc, &((struct gc_obj_ctx*)o)->parent->base); | |
for (size_t i = 0; i < ((struct gc_obj_ctx*)o)->var_count; ++i) | |
if (((struct gc_obj_ctx*)o)->vars[i].value.type == VALUE_OBJ) | |
gc_mark(gc, ((struct gc_obj_ctx*)o)->vars[i].value.obj_value); | |
break; | |
case GC_OBJ_MAP: | |
default: | |
fprintf(stderr, "gc_mark: unknown gc object type: %d!\n", o->type); | |
break; | |
} | |
} | |
void gc_sweep(struct gc *gc) { | |
size_t last = 0; | |
size_t sweeped = 0; | |
for (size_t i = 0; i < gc->obj_count; ++i) { | |
if (gc->objs[i] && !gc->objs[i]->marked) { | |
free_gc_obj(gc->objs[i]); | |
gc->objs[i] = NULL; | |
sweeped += 1; | |
} else { | |
gc->objs[i]->marked = 0; // unmark | |
memmove(&gc->objs[last], &gc->objs[i], sizeof(struct gc_obj_base*)); | |
++last; | |
} | |
} | |
gc->obj_count -= sweeped; | |
if (SHOULD_PRINT_GC_SWEEPS) | |
fprintf(stderr, "\n[GC: sweeped %zu objects!]\n", sweeped); | |
} | |
void gc_mark_value_(struct gc *gc, struct value val) { | |
if (val.type == VALUE_OBJ) | |
gc_mark(gc, val.obj_value); | |
if (val.type == VALUE_RET && val.ret_value->value.type == VALUE_OBJ) | |
gc_mark(gc, val.ret_value->value.obj_value); | |
} | |
void gc_marksweep(struct gc *gc, struct gc_obj_ctx *ctx, struct eval_context *eval) { | |
gc_mark(gc, &ctx->base); | |
for (size_t i = 0; i < eval->stack_size; ++i) | |
gc_mark_value_(gc, eval->stack[i]); | |
for (size_t i = 0; i < eval->callstack_size; ++i) | |
gc_mark_value_(gc, eval->callstack[i].callee); | |
gc_sweep(gc); | |
} | |
void gc_sweepall(struct gc *gc) { | |
for (size_t i = 0; i < gc->obj_count; ++i) { | |
free_gc_obj(gc->objs[i]); | |
} | |
gc->obj_count = 0; | |
free(gc->objs); | |
} | |
__attribute__((format(printf, 2, 3))) | |
struct value error_value(struct eval_context *ctx, const char *fmt, ...) { | |
va_list va; | |
va_start(va, fmt); | |
size_t len = vsnprintf(NULL, 0, fmt, va); | |
va_end(va); | |
char buf[len + 1]; | |
va_start(va, fmt); | |
vsnprintf(buf, sizeof buf, fmt, va); | |
va_end(va); | |
struct ret_value *ret = calloc(1, sizeof(*ret)); | |
ret->value = (struct value){ .type = VALUE_OBJ, .obj_value = &gc_obj_str_new(ctx->gc, buf)->base }; | |
ret->error = 1; | |
return (struct value){ .type = VALUE_RET, .ret_value = ret }; | |
} | |
struct value concat_strings(struct gc *gc, struct gc_obj_str *lhs, struct gc_obj_str *rhs) { | |
char *nstr = calloc(1, strlen(lhs->value) + strlen(rhs->value) + 1); | |
memcpy(nstr, lhs->value, strlen(lhs->value)); | |
memcpy(nstr + strlen(lhs->value), rhs->value, strlen(rhs->value)); | |
return (struct value){ .type = VALUE_OBJ, .obj_value = &gc_obj_str_new_moved(gc, nstr)->base }; | |
} | |
static inline void eval_ast(struct eval_context *eval, struct ast_base *n); | |
void eval_ensure_stack_capacity_(struct eval_context *eval, size_t mincap) { | |
if (eval->stack_capacity < mincap) { | |
if (eval->stack_capacity == 0) eval->stack = malloc(sizeof(struct value) * mincap); | |
else eval->stack = realloc(eval->stack, sizeof(struct value) * mincap); | |
eval->stack_capacity = mincap; | |
} | |
} | |
void eval_ensure_callstack_capacity_(struct eval_context *eval, size_t mincap) { | |
if (eval->callstack_capacity < mincap) { | |
if (eval->callstack_capacity == 0) eval->callstack = malloc(sizeof(struct callstack_entry) * mincap); | |
else eval->callstack = realloc(eval->callstack, sizeof(struct callstack_entry) * mincap); | |
eval->callstack_capacity = mincap; | |
} | |
} | |
void eval_free(struct eval_context *eval) { | |
if (eval->stack_capacity > 0) free(eval->stack); | |
} | |
void eval_push(struct eval_context *eval, struct value val) { | |
eval_ensure_stack_capacity_(eval, ++eval->stack_size); | |
eval->stack[eval->stack_size - 1] = val; | |
} | |
struct value eval_pop(struct eval_context *eval) { | |
if (eval->stack_size == 0) { | |
fprintf(stderr, "eval_pop: stack underflow!\n"); | |
FAIL(); | |
} | |
return eval->stack[--eval->stack_size]; | |
} | |
struct callstack_entry *eval_push_call(struct eval_context *eval) { | |
eval_ensure_callstack_capacity_(eval, ++eval->callstack_size); | |
return &eval->callstack[eval->callstack_size - 1]; | |
} | |
void eval_pop_call(struct eval_context *eval) { | |
if (eval->callstack_size == 0) { | |
fprintf(stderr, "eval_pop_call: stack underflow!\n"); | |
FAIL(); | |
} | |
--eval->callstack_size; | |
} | |
static struct value convert_index(struct eval_context *eval, intmax_t index, size_t count) { | |
if (index < 0) index += count; | |
if (UNLIKELY(index >= (intmax_t)count)) | |
return error_value(eval, "array index too big (out of bounds)!"); | |
if (UNLIKELY(index < 0)) | |
return error_value(eval, "array index too small (out of bounds)!"); | |
return (struct value){ .type = VALUE_INT, .int_value = index }; | |
} | |
struct value eval_invoke( | |
struct eval_context *eval, | |
struct value fun, | |
size_t arg_count, | |
struct value *args, | |
struct where *where | |
) { | |
struct callstack_entry *call = eval_push_call(eval); | |
call->call_location = *where; | |
call->callee = fun; | |
if (fun.type == VALUE_EXT) { | |
struct value ret = fun.ext_value.func(eval, arg_count, args); | |
eval_pop_call(eval); | |
return ret; | |
} | |
if (fun.type != VALUE_OBJ) { | |
return error_value(eval, "can only invoke functions, arrays and extern functions!"); | |
} | |
if (fun.obj_value->type == GC_OBJ_ARR) { | |
if (arg_count != 1 && arg_count != 2) { | |
return error_value(eval, "invoking arrays has to pass either 1 or 2 arguments!"); | |
} | |
if (args[0].type != VALUE_INT) { | |
return error_value(eval, "an array index has to be an integer!"); | |
} | |
struct value idx = convert_index(eval, args[0].int_value, ((struct gc_obj_arr*)fun.obj_value)->count); | |
if (idx.type == VALUE_RET) return idx; | |
struct value *item = &((struct gc_obj_arr*)fun.obj_value)->items[idx.int_value]; | |
if (arg_count == 2) *item = args[1]; | |
eval_pop_call(eval); | |
return *item; | |
} | |
if (fun.obj_value->type == GC_OBJ_FUN) { | |
struct gc_obj_fun *fo = (struct gc_obj_fun*)fun.obj_value; | |
if (arg_count < fo->min_arg_count) { | |
return error_value(eval, | |
"function expects at least %u arguments, but %zu provided!", fo->min_arg_count, arg_count); | |
} | |
struct gc_obj_ctx *nctx = gc_obj_ctx_new(eval->gc, fo->ctx); | |
size_t f_min_arg_count = arg_count < fo->arg_count ? arg_count : fo->arg_count; | |
for (size_t i = 0; i < f_min_arg_count; ++i) { | |
gc_obj_ctx_set_new_var(nctx, fo->args[i], args[i]); | |
} | |
if (arg_count < fo->arg_count) { | |
for (size_t i = arg_count; i < fo->arg_count; ++i) { | |
gc_obj_ctx_set_new_var(nctx, fo->args[i], (struct value){ .type = VALUE_NIL }); | |
} | |
} | |
struct gc_obj_ctx *old_this = eval->this; | |
eval->this = nctx; | |
// so that the old this doesn't get sweeped during this call. | |
eval_push(eval, (struct value){ .type = VALUE_OBJ, .obj_value = &old_this->base }); | |
eval_ast(eval, fo->code); | |
gc_marksweep(eval->gc, eval->this, eval); | |
struct value res = eval_pop(eval); | |
eval_pop(eval); // pop old this. | |
eval->this = old_this; | |
if (res.type == VALUE_RET) { | |
if (res.ret_value->error) return res; | |
struct value ret = res.ret_value->value; | |
free(res.ret_value); | |
eval_pop_call(eval); | |
return ret; | |
} else { | |
eval_pop_call(eval); | |
return res; | |
} | |
} | |
// eval_pop_call(eval); | |
fprintf(stderr, "invoke: not implemented\n"); | |
exit(1); | |
} | |
static inline void _eval_ast(struct eval_context *eval, struct ast_base *n); | |
/** the stack is strictly larger by one after calling this. */ | |
static inline void eval_ast(struct eval_context *eval, struct ast_base *n) { | |
size_t before = eval->stack_size; | |
_eval_ast(eval, n); | |
size_t after = eval->stack_size; | |
if ((ptrdiff_t)after - (ptrdiff_t)before != 1) { | |
fprintf(stderr, "eval_ast protocol violation: stack must be strictly larger by 1\n"); | |
fprintf(stderr, " before: %zu\n", before); | |
fprintf(stderr, " after: %zu\n", after); | |
FAIL(); | |
} | |
} | |
static inline void _eval_ast(struct eval_context *eval, struct ast_base *n) { | |
#define RETURN_IF_RET(X) if (eval->stack[eval->stack_size - 1].type == VALUE_RET) { X; return; } | |
switch (n->type) { | |
case AST_INT: | |
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = ((struct ast_int*)n)->value }); | |
break; | |
case AST_VAR: | |
eval_push(eval, gc_obj_ctx_get_var(eval->this, ((struct ast_var*)n)->name)); | |
break; | |
case AST_STR: | |
eval_push(eval, (struct value){ .type = VALUE_OBJ, | |
.obj_value = &gc_obj_str_new(eval->gc, ((struct ast_str*)n)->value)->base }); | |
break; | |
case AST_TRN: { | |
struct ast_trn *trn = (struct ast_trn*)n; | |
eval_ast(eval, trn->cond); RETURN_IF_RET(); | |
struct value cond = eval_pop(eval); | |
if (cond.type != VALUE_INT || cond.int_value) | |
eval_ast(eval, trn->then); | |
else if (trn->other != NULL) | |
eval_ast(eval, trn->other); | |
else | |
eval_push(eval, (struct value){0}); | |
} break;; | |
case AST_UNR: switch (((struct ast_unr*)n)->type) { | |
#define UNARY_OP(OP) { \ | |
eval_ast(eval, ((struct ast_unr*)n)->opr); RETURN_IF_RET(); struct value v = eval_pop(eval); \ | |
if (v.type != VALUE_INT) { \ | |
eval_push(eval, error_value(eval, "the unary %s operator expects numbers as operands!", \ | |
ast_unr_type_to_string(((struct ast_unr*)n)->type))); \ | |
} else { \ | |
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = OP v.int_value }); \ | |
} \ | |
} | |
case AST_UNR_NOT: UNARY_OP(!); break; | |
case AST_UNR_NEG: UNARY_OP(-); break; | |
case AST_UNR_BNEG: UNARY_OP(~); break; | |
#undef UNARY_OP | |
default: | |
fprintf(stderr, "eval: unknown unr operator %d!\n", ((struct ast_unr*)n)->type); | |
exit(1); | |
} break; | |
case AST_BIN: switch (((struct ast_bin*)n)->type) { | |
case AST_BIN_ADD: { | |
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET(); | |
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval)); | |
struct value rhs = eval_pop(eval); | |
struct value lhs = eval_pop(eval); | |
if (lhs.type == VALUE_INT && rhs.type == VALUE_INT) { | |
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = lhs.int_value + rhs.int_value }); | |
} else if (lhs.type == VALUE_OBJ && lhs.obj_value->type == GC_OBJ_STR && | |
rhs.type == VALUE_OBJ && rhs.obj_value->type == GC_OBJ_STR) { | |
eval_push(eval, concat_strings( | |
eval->gc, | |
(struct gc_obj_str*)lhs.obj_value, | |
(struct gc_obj_str*)rhs.obj_value)); | |
} else { | |
eval_push(eval, error_value(eval, "the + operator expects numbers or strings as operands!")); | |
} | |
} break; | |
case AST_BIN_EQL: { | |
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET(); | |
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval)); | |
struct value rhs = eval_pop(eval); | |
struct value lhs = eval_pop(eval); | |
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = value_equal(lhs, rhs) }); | |
} break; | |
case AST_BIN_NEQ: { | |
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET(); | |
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval)); | |
struct value rhs = eval_pop(eval); | |
struct value lhs = eval_pop(eval); | |
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = !value_equal(lhs, rhs) }); | |
} break; | |
#define BINARY_OP(OP) { \ | |
eval_ast(eval, ((struct ast_bin*)n)->lhs); RETURN_IF_RET(); \ | |
eval_ast(eval, ((struct ast_bin*)n)->rhs); RETURN_IF_RET(eval_pop(eval)); \ | |
struct value rhs = eval_pop(eval); \ | |
struct value lhs = eval_pop(eval); \ | |
if (lhs.type != VALUE_INT || rhs.type != VALUE_INT) { \ | |
eval_push(eval, error_value(eval, "the %s operator expects numbers as operands!", \ | |
ast_bin_type_to_string(((struct ast_bin*)n)->type))); \ | |
} else { \ | |
eval_push(eval, (struct value){ .type = VALUE_INT, .int_value = lhs.int_value OP rhs.int_value }); \ | |
} \ | |
} | |
case AST_BIN_SUB: BINARY_OP(-); break; | |
case AST_BIN_MUL: BINARY_OP(*); break; | |
case AST_BIN_DIV: BINARY_OP(/); break; | |
case AST_BIN_OR: BINARY_OP(||); break; | |
case AST_BIN_BOR: BINARY_OP(|); break; | |
case AST_BIN_AND: BINARY_OP(&&); break; | |
case AST_BIN_BAND: BINARY_OP(&); break; | |
case AST_BIN_SHL: BINARY_OP(<<); break; | |
case AST_BIN_SHR: BINARY_OP(>>); break; | |
case AST_BIN_MOD: BINARY_OP(%); break; | |
case AST_BIN_GTH: BINARY_OP(>); break; | |
case AST_BIN_LTH: BINARY_OP(<); break; | |
case AST_BIN_GEQ: BINARY_OP(>=); break; | |
case AST_BIN_LEQ: BINARY_OP(<=); break; | |
#undef BINARY_OP | |
default: | |
fprintf(stderr, "eval: unknown bin operator %d!\n", ((struct ast_bin*)n)->type); | |
exit(1); | |
} break; | |
case AST_FUN: { | |
struct ast_fun *fun = (struct ast_fun*)n; | |
eval_push(eval, (struct value){ .type = VALUE_OBJ, .obj_value = &gc_obj_fun_new( | |
eval->gc, | |
fun->arg_count, fun->min_arg_count, | |
fun->args, | |
eval->this, | |
fun->body, | |
NULL | |
)->base }); | |
} break; | |
case AST_RET: { | |
eval_ast(eval, ((struct ast_ret*)n)->value); RETURN_IF_RET(); struct value v = eval_pop(eval); | |
struct ret_value *retval = calloc(1, sizeof(*retval)); | |
retval->value = v; | |
retval->error = ((struct ast_ret*)n)->error; | |
struct value val = { .type = VALUE_RET, .ret_value = retval }; | |
eval_push(eval, val); | |
if (retval->error) { | |
struct callstack_entry *e = eval_push_call(eval); | |
e->call_location = n->where; | |
e->callee = val; | |
} | |
} break; | |
case AST_SET: { | |
eval_ast(eval, ((struct ast_set*)n)->value); RETURN_IF_RET(); struct value v = eval_pop(eval); | |
gc_obj_ctx_set_var(eval->this, ((struct ast_set*)n)->name, v); | |
eval_push(eval, (struct value){0}); | |
} break; | |
case AST_LET: { | |
eval_ast(eval, ((struct ast_let*)n)->value); RETURN_IF_RET(); struct value v = eval_pop(eval); | |
if (v.type == VALUE_OBJ && v.obj_value->type == GC_OBJ_FUN) { | |
struct gc_obj_fun *fun = (struct gc_obj_fun *)v.obj_value; | |
if (fun->name == NULL) { | |
fun->name = malloc(strlen(((struct ast_let*)n)->name) + 1); | |
memcpy(fun->name, ((struct ast_let*)n)->name, strlen(((struct ast_let*)n)->name) + 1); | |
} | |
} | |
gc_obj_ctx_set_new_var(eval->this, ((struct ast_let*)n)->name, v); | |
eval_push(eval, (struct value){0}); | |
} break; | |
case AST_BLK: { | |
struct ast_blk *blk = (struct ast_blk*)n; | |
struct gc_obj_ctx *old_this = eval->this; | |
eval->this = gc_obj_ctx_new(eval->gc, eval->this); | |
for (size_t i = 0; i < blk->stmt_count; ++i) { | |
eval_ast(eval, blk->stmts[i]); | |
if (eval->stack[eval->stack_size - 1].type == VALUE_RET) break; | |
if (i < blk->stmt_count - 1) eval_pop(eval); | |
} | |
eval->this = old_this; | |
} break; | |
case AST_INV: { | |
struct ast_inv *inv = (struct ast_inv*)n; | |
eval_ast(eval, inv->fun); RETURN_IF_RET(); struct value fun = eval->stack[eval->stack_size - 1]; | |
struct value *args = inv->arg_count > 0 ? calloc(inv->arg_count, sizeof(struct value)) : NULL; | |
for (size_t i = 0; i < inv->arg_count; ++i) { | |
eval_ast(eval, inv->args[i]); | |
args[i] = eval->stack[eval->stack_size - 1]; | |
if (args[i].type == VALUE_RET) { | |
for (size_t j = 0; j <= i; ++j) eval_pop(eval); // pop already pushed arguments. | |
eval_pop(eval); // pop `fun`. | |
eval_push(eval, args[i]); // push return argument. | |
free(args); | |
return; | |
} | |
} | |
struct value res = eval_invoke(eval, fun, inv->arg_count, args, &n->where); | |
for (size_t i = 0; i < inv->arg_count; ++i) eval_pop(eval); // pop arguments. | |
if(inv->arg_count > 0) free(args); | |
eval_pop(eval); // pop `fun`. | |
eval_push(eval, res); | |
} break; | |
case AST_ARR: { | |
struct ast_arr *arr = (struct ast_arr*)n; | |
struct gc_obj_arr *obj = gc_obj_arr_new_empty(eval->gc); | |
for (size_t i = 0; i < arr->elem_count; ++i) { | |
eval_ast(eval, arr->elems[i]); RETURN_IF_RET(); | |
gc_obj_arr_push(obj, eval->stack[eval->stack_size - 1]); | |
eval_pop(eval); | |
} | |
eval_push(eval, (struct value){ .type = VALUE_OBJ, .obj_value = &obj->base }); | |
} break; | |
default: | |
fprintf(stderr, "eval: unknown ast type: %d!\n", n->type); | |
abort(); | |
// exit(1); | |
} | |
} | |
static void wse_std_print(struct eval_context *ctx, FILE *fout, size_t arg_count, struct value *args) { | |
struct file_outpipe foutp = file_outpipe_of(fout); | |
for (size_t i = 0; i < arg_count; ++i) { | |
print_value(args[i], &foutp.base, PRINT_FLAG_COLOR); | |
if (i != arg_count - 1) foutp.base.putc(&foutp.base, ' '); | |
} | |
foutp.base.putc(&foutp.base, '\n'); | |
} | |
static struct gc_obj_base *wse_std_tostring(struct eval_context *ctx, struct value val) { | |
struct str_outpipe pipe; | |
str_outpipe_init(&pipe); | |
print_value(val, &pipe.base, 0); | |
struct gc_obj_str *res = gc_obj_str_new(ctx->gc, pipe.str); | |
str_outpipe_free(&pipe); | |
return &res->base; | |
} | |
struct value wse_std_tostring_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count != 1) { | |
return error_value(ctx, "tostring expects exactly one argument (%zu given)!\n", arg_count); | |
} | |
return (struct value){ .type = VALUE_OBJ, .obj_value = wse_std_tostring(ctx, args[0]) }; | |
} | |
struct value wse_std_print_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
wse_std_print(ctx, stdout, arg_count, args); | |
return (struct value){0}; | |
} | |
struct value wse_std_fprint_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count == 0) { | |
return error_value(ctx, "fprint expects at least one argument (the file)!\n"); | |
} | |
if (args[0].type != VALUE_DAT) { | |
return error_value(ctx, "fprint expects the first argument to be a file pointer!\n"); | |
} | |
wse_std_print(ctx, args[0].dat_value, arg_count - 1, args + 1); | |
return (struct value){0}; | |
} | |
struct value wse_std_arrinsert_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count < 2 || arg_count > 3) { | |
return error_value(ctx, "arrinsert expected at 2 or 3 arguments (array, item and index)"); | |
} | |
if (args[0].type != VALUE_OBJ || args[0].obj_value->type != GC_OBJ_ARR) { | |
return error_value(ctx, "arrinsert expects the first argument to be an array!\n"); | |
} | |
struct gc_obj_arr *arr = (struct gc_obj_arr*)args[0].obj_value; | |
size_t index = arr->count; | |
if (arg_count == 3) { | |
if (args[2].type != VALUE_INT) { | |
return error_value(ctx, "arrinsert expects the third argument to be an integer!\n"); | |
} | |
struct value val = convert_index(ctx, args[2].int_value, arr->count); | |
if (val.type == VALUE_RET) return val; | |
index = val.int_value; | |
} | |
size_t ret = gc_obj_arr_insert(arr, args[1], index); | |
return (struct value){ .type = VALUE_INT, .int_value = ret }; | |
} | |
struct value wse_std_arrdelete_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count < 1 || arg_count > 2) { | |
return error_value(ctx, "arrdelete expected 1-2 arguments (array and index)"); | |
} | |
if (args[0].type != VALUE_OBJ || args[0].obj_value->type != GC_OBJ_ARR) { | |
return error_value(ctx, "arrdelete expects the first argument to be an array!\n"); | |
} | |
struct gc_obj_arr *arr = (struct gc_obj_arr*)args[0].obj_value; | |
if (arr->count <= 0) { | |
return error_value(ctx, "cannot arrdelete from empty array."); | |
} | |
size_t index = arr->count - 1; | |
if (arg_count == 2) { | |
if (args[1].type != VALUE_INT) { | |
return error_value(ctx, "arrdelete expects the third argument to be an integer!\n"); | |
} | |
struct value val = convert_index(ctx, args[1].int_value, arr->count); | |
if (val.type == VALUE_RET) return val; | |
index = val.int_value; | |
} | |
size_t ret = gc_obj_arr_delete((struct gc_obj_arr*)args[0].obj_value, args[1], index); | |
return (struct value){ .type = VALUE_INT, .int_value = ret }; | |
} | |
struct value wse_std_countof_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count != 1) { | |
return error_value(ctx, "countof expects one argument (container)"); | |
} | |
if (args[0].type != VALUE_OBJ) { | |
return error_value(ctx, "countof expects the argument to be an array, a map or a context!\n"); | |
} | |
if (args[0].obj_value->type == GC_OBJ_ARR) { | |
return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_arr*)args[0].obj_value)->count }; | |
} | |
if (args[0].obj_value->type == GC_OBJ_CTX) { | |
return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_ctx*)args[0].obj_value)->var_count }; | |
} | |
return error_value(ctx, "countof: not implemented or argument not a container!\n"); | |
} | |
struct value wse_std_parentof_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count != 1) { | |
return error_value(ctx, "parentof expects one argument (context)"); | |
} | |
if (args[0].type != VALUE_OBJ || args[0].obj_value->type != GC_OBJ_CTX) { | |
return error_value(ctx, "parentof expects the argument to be a context!\n"); | |
} | |
return (struct value){ .type = VALUE_OBJ, .obj_value = &((struct gc_obj_ctx*)args[0].obj_value)->parent->base }; | |
} | |
struct value wse_std_keysof_bind(struct eval_context *ctx, size_t arg_count, struct value *args) { | |
if (arg_count != 1) { | |
return error_value(ctx, "keysof expects one argument (container)"); | |
} | |
if (args[0].type != VALUE_OBJ) { | |
return error_value(ctx, "keysof expects the argument to be an array, a map or a context!\n"); | |
} | |
// if (args[0].obj_value->type == GC_OBJ_ARR) { | |
// return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_arr*)args[0].obj_value)->count }; | |
// } | |
// if (args[0].obj_value->type == GC_OBJ_CTX) { | |
// return (struct value){ .type = VALUE_INT, .int_value = ((struct gc_obj_ctx*)args[0].obj_value)->var_count }; | |
// } | |
return error_value(ctx, "keysof: not implemented or argument not a container!\n"); | |
} | |
static const char *just_fname_(const char *path) { | |
const char *chunk = path; | |
while (1) { // while 1? idk if this is safe | |
const char *s = chunk; | |
for (; *s != '/'; ++s) { | |
if (*s == '\0') return chunk; | |
} | |
chunk = s + 1; | |
} | |
return chunk; // unreachable? UB lets go | |
} | |
static void init_wse_std(struct gc_obj_ctx *ctx) { | |
#define WHERE_C (struct where){ .fname = just_fname_(__FILE__), .line = __LINE__, .col = 1 } | |
#define STDFUNC(NAME) (struct value){ .type = VALUE_EXT, .ext_value = { wse_std_##NAME##_bind, #NAME, WHERE_C } } | |
#define SETSTDFUNC(NAME) gc_obj_ctx_set_new_var(ctx, #NAME, STDFUNC(NAME)) | |
gc_obj_ctx_set_new_var(ctx, "nil", (struct value){ .type = VALUE_NIL }); | |
SETSTDFUNC(tostring); | |
SETSTDFUNC(print); | |
SETSTDFUNC(fprint); | |
gc_obj_ctx_set_new_var(ctx, "stdout", (struct value){ .type = VALUE_DAT, .dat_value = stdout }); | |
gc_obj_ctx_set_new_var(ctx, "stderr", (struct value){ .type = VALUE_DAT, .dat_value = stderr }); | |
gc_obj_ctx_set_new_var(ctx, "stdin", (struct value){ .type = VALUE_DAT, .dat_value = stdin }); | |
SETSTDFUNC(arrinsert); | |
SETSTDFUNC(arrdelete); | |
SETSTDFUNC(countof); | |
SETSTDFUNC(parentof); | |
SETSTDFUNC(keysof); | |
#undef SETSTDFUNC | |
#undef STDFUNC | |
#undef WHERE_C | |
} | |
int run_file(struct eval_context *ectx, const char *path) { | |
struct lexer lex = {0}; | |
{ | |
FILE *fin = fopen("main.wse", "r"); | |
if (fin == NULL) { | |
return perror("failed to open file"), 1; | |
} | |
lexer_add_from(&lex, fin, "main.wse"); | |
fclose(fin); | |
} | |
// for (size_t i = 0; i < lex.tok_count; ++i) { | |
// print_where(lex.toks[i].where, stdout); | |
// fputc('\t', stdout); | |
// print_token(&lex, &lex.toks[i], stdout); | |
// } | |
struct parser pars = { .lex = &lex }; | |
int ret = 0; | |
if (lex.toks->type != TOKEN_EOF) { | |
struct err_asts r = parse_stmts(&pars, lex.toks); | |
if (ERR_AST_IS_ERR(r)) { | |
print_where(r.rest->where, stderr); | |
fprintf(stderr, "\t\033[1;31mError\033[m: %s\n", r.error); | |
print_where(r.rest->where, stdout); | |
printf("\tgot: "); | |
print_token(&lex, r.rest, stdout); | |
ret = 1; | |
} else if (r.rest && r.rest->type != TOKEN_EOF) { | |
print_where(r.rest->where, stderr); | |
fprintf(stderr, "\t\033[1;31mError\033[m: expected end of input\n"); | |
ret = 1; | |
} else { | |
for (size_t i = 0; i < r.ast_count; ++i) { | |
eval_ast(ectx, r.asts[i]); | |
// we can safely free the ast since functions | |
// (the only things that reference asts) | |
// copy their code. (we also dont waste memory) | |
free_ast(r.asts[i]); | |
r.asts[i] = NULL; | |
if (ectx->stack[ectx->stack_size - 1].type == VALUE_RET) break; | |
} | |
} | |
for (size_t i = 0; i < r.ast_count; ++i) { | |
if (r.asts[i] != NULL) free_ast(r.asts[i]); | |
} | |
if (r.ast_count > 0) free(r.asts); | |
} | |
lexer_free(&lex); | |
return ret; | |
} | |
void print_stacktrace(struct eval_context *eval) { | |
struct file_outpipe foutp = file_outpipe_of(stderr); | |
fprintf(stderr, "\033[1mStacktrace\033[m:\n"); | |
for (size_t i = eval->callstack_size; i --> 0; ) { | |
fprintf(stderr, " \033[1mat\033[m: "); | |
print_where(eval->callstack[i].call_location, stderr); | |
fputs("\t", stderr); | |
print_value(eval->callstack[i].callee, &foutp.base, PRINT_FLAG_COLOR | PRINT_FLAG_STRUCT); | |
fputs("\n", stderr); | |
} | |
fprintf(stderr, " \033[1m-- entry --\033[m\n"); | |
} | |
int run_file_full(const char *path) { | |
struct gc gc = {0}; | |
struct gc_obj_ctx *globals = gc_obj_ctx_new(&gc, NULL); | |
init_wse_std(globals); | |
struct eval_context ectx = { | |
.gc = &gc, .this = globals, | |
.stack_capacity = 0, .stack_size = 0, .stack = NULL | |
}; | |
int ret = 0; | |
ret = run_file(&ectx, path); | |
if (ectx.stack_size > 0 && ectx.stack[ectx.stack_size - 1].type == VALUE_RET) { | |
if (ectx.stack[ectx.stack_size - 1].ret_value->error) { | |
struct file_outpipe foutp = file_outpipe_of(stderr); | |
print_stacktrace(&ectx); | |
fprintf(stderr, "\033[1;31mError\033[m: "); | |
print_value(ectx.stack[ectx.stack_size - 1].ret_value->value, &foutp.base, PRINT_FLAG_COLOR); | |
fprintf(stderr, "\n"); | |
ret = 1; | |
} | |
free(ectx.stack[ectx.stack_size - 1].ret_value); | |
} | |
eval_free(&ectx); | |
gc_sweepall(&gc); | |
gc_free(&gc); | |
return ret; | |
} | |
int cli(int argc, char *argv[]) { | |
if (argc < 2) { | |
fprintf(stderr, "usage:\n\t%s <fname>\n", argv[0]); | |
return 1; | |
} | |
return run_file_full(argv[1]); | |
} | |
int main(int argc, char *argv[]) { | |
return cli(argc, argv); | |
} |
Syntax:
The syntax is a combination of javascript, a little bit of go and some rust. [This comment isn't finished yet.]
Comments
Comments start with #
and span the whole line. (they only work outside of strings and other literals like that)
Basic values
There are:
- Strings:
Octal escapes arent supported yet.
"This is a string" # Escapes are the same as in C "\n" # - Newline, ASCII 10 "\t" # - Tab, ASCII 9 "\r" # - Carriage Return, ASCII 13 "\0" # - Nul, ASCII 0 "\a" # - Bell, ASCII 7 "\e" # - Escape, ASCII 27 "\"" # - Literal ", ASCII 34 "\'" # - Literal ', ASCII 39 "\\" # - Literal \, ASCII 92
- Integers:
1938291 # No hexadecimal and octal for now. 1_938_291 # You can add _ for clarity. They are ignored. 1___213_ # This is valid. (the same as 1213)
Syntax sugar:
if
if condition { ... };
if condition { ... } else { ... };
Is equivalent to (but allowed only in statements)
condition ? { ... };
condition ? { ... } : { ... };
return
return value
Is equivalent to (but allowed only in statements)
=> value;
compound assignments
a += 3;
b <<= 4;
Is equivalent to (including other operators of course)
a = a + 3;
b = b << 4;
function arguments
func(a, b) x => { ... };
func() (x, y) => { ... };
Is equivalent to
func(a, b, x => { ... });
func((x, y) => { ... });
Example programs:
Hello World
print("Hello, World!");
Hello World with functions
(x => # function taking one argument. multiple arguments with comma and parens
fprint(stderr, "Hello, " + tostring(x))
)(12); # invoke the function immediately
Functions, branches and returns
(x => fprint(stderr, "Hello, " + tostring(x)))((() => { # code block in { ... }
a := 15; # new variable, local to this function (block)
b := 1; # another variable
a = a + b;
a == 16 ? { # ternary operator (if statements are syntax sugar) 'cond ? then : else'
b = b + a;
=> b # return from *function* (not block) with value
}; # false branch optional
1; # returned from block
})());
Same as above, but with syntax sugar
(x => fprint(stderr, "Hello, " + tostring(x)))((() => {
a := 15;
b := 1;
a += b;
if a == 16 {
b += a;
return b;
};
1;
})());
Stuff
assert := (e, msg) => !e ? { =!> "assertion error: " + tostring(msg); };
unless := (cond, func) => !cond ? func(cond);
partial := (arg1, func) => arg2 => func(arg1, arg2);
say_hi_to := name => fprint(stderr, "Hello, " + tostring(name));
with := (o, f) => {
fprint(stderr, "begin with " + tostring(o));
r := f(o);
fprint(stderr, "end with " + tostring(o));
return r;
};
r := with("coinconclusive") name => {
say_hi_to(name);
unless(1 == 2) _ => print("universe normal");
return 2;
};
print(r);
add12 := partial(12) (a, b) => a + b;
print(add12(6));
Iterators
default := (x, d) => x == nil ? d : x;
range := (a, b, ? s) => {
i := a;
j := a;
s = default(s, 1);
() => { j = i; i += s; j == b ? nil : j }
};
each := (it, fn) => {
loop := v => v != nil ? { fn(v); loop(it()) };
loop(it())
};
map := (it, fn) => {
i := 0; j := 0; v := nil;
() => { j = i; i + 1; v = it(); v == nil ? nil : fn(v, j) }
};
fold := (it, fn, ? start) => {
r := start == nil ? it() : start;
if r == nil { return nil };
loop := (a, b) => b == nil ? a : loop(fn(a, b), it());
loop(r, it())
};
arrayof := it => {
arr := [];
each(it) e => arrinsert(arr, e);
arr
};
assert := (e, msg) => {
if !e {
=!> "Assertion failed: " + tostring(msg);
}
};
print(fold(map(range(1, 4)) e => e * e) (a, b) => a + b);
TODO:
See also the TODO
s in the code.
- add error return (exceptions) support
- convert eval errors to error returns
- add error return syntax (
=!> value
) - add
if
syntax sugar - add
return
syntax sugar - allow
}
or EOI after;
in block/top-level statements - add block function argument sugar (
f(1, 2) |x| { ... }
isf(1, 2, |x| { ...} )
) - combined assign operators (
+=
, etc) - change function syntax to use arrows (
(args, ...) => body
) - make sure gc actually works
- array literals (
[a, b, c]
) - get/set item from array (
arr(index)
to get,arr(index, value)
to set) - array builtins (
newsize =
arrpush
arrinsert(arr, value[, index])
,oldvalue =
arrpop
arrdelete(arr[, index])
) - pipelines (
o |> f(a, b) |> g(x) a => a + 1
isg(f(o, a, b), a => a + 1)
) - io builtins (
file = iofopen(fname[, mode])
,iofclose(file)
,data = iofread(file[, n])
) - os builtins (
osargs
name?,osenv(name[, value])
name?,osexit([code])
) - map literals (
{ "a": b, c: d }
) - get/set item for map (
map(key[, value])
) - map builtins (
oldvalue = mapremove(key)
) - context object creation (
new([parent])
) - field access operator for context objects (
ctx.var
) - field setting for context objects (
ctx.var = val
) (parser note: finally use the error case) - cli
- modules (
mod = import(modpath)
,mod
is a context object) - handle recursive imports (module cache)
- builtins module?
- methods?
|f, x| |*args| f(x, *args)
? - varargs (
|a, *args| f(a, *args)
) - stdlib (stuffs)
Code
Some insight about the interpreter code itself. This isn't done.
QoL
-
Safe nullable pointer offset. (
NULL + x
is UB)static inline const char *offset_nullable_ptr_safe(const char *ptr, size_t offset) { return ptr == NULL ? ptr : ptr + offset; }
-
Allocate memory for a type.
#define MEMALLOC(T) ((T*)calloc(1, sizeof(T)))
Semantic naming
Idk how to call this properly, but basically:
Structs
- "base class" struct
struct x_base { enum x_type type; ... };
- "sub class" struct
struct x_y { struct x_base base; ... }
Functions
- New "sub class" struct. Allocates the struct in some way.
struct x_y *x_y_new([allocator, ]...) { struct x_y *x = MEMALLOC(struct x_y); }
- Free a pointer to a "sub class" struct. Does
free(x)
void free_x(struct x_base *x) { if (x == NULL) { fprintf(stderr, "free_x(NULL)!\n"); return; } switch (x->type) { ... default: fprintf(stderr, "free: unknown x type: %d!\n", x->type); break; } free(x); }
- Free internal struct fields
void x_free(struct x *x) { ... }
Common patterns
Array Free
When freeing arrays, this is how it usually happens
// free individual items
for (size_t i = 0; i < x_count; ++i) {
free_x(xs[i]); // or
x_free(&xs[i]); // or
free(xs[i]);
}
// free the whole array if it exists
// (the array is NULL when there are no items)
if (x_count > 0) free(xs);
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compiling:
Tested only with clang on arch linux. Should work on gcc but im not sure if
__builtin_unreachable
exists there. (TODO: make it compiler-independent)Debug
Release
Running:
To run, simply execute the generated executable. If debugging and running unmodified code and if the sanitizer reports something, please comment (with the code that triggered the error)!