Skip to content

Instantly share code, notes, and snippets.

@coinconclusive
Created June 22, 2023 13:50
Show Gist options
  • Save coinconclusive/5df19b63009ac9b9b0f2ec68dda6035b to your computer and use it in GitHub Desktop.
Save coinconclusive/5df19b63009ac9b9b0f2ec68dda6035b to your computer and use it in GitHub Desktop.
WIP (planned to be) selfhosting c subset compiler
#include <signal.h> /* raise, SIGTRAP (in fail, for debugging) */
#include <stdio.h> /* fprintf, stderr (in _DBG, for debugging) */
#include <unistd.h> /* write, read, _exit */
#include <fcntl.h> /* open */
#define _DBG(...) fprintf(stderr,__VA_ARGS__)
#define _DBGfn _DBG("%s:%d",__FILE__,__LINE__)
#define _TN ((char*[]){"EOF","NAM","INT"})[tok]
#define _pS(S) _DBG("%s\n",S)
#define _pD(S) _DBG("%d\n",S)
#define _pC(S) _DBG("'%c'\n",S)
#define _pTTn (_DBG("%s:",_TN),tok==1?_pS(toknam):tok==2?_pD(tokint):0)
#define _pTT {_DBGfn;_DBG(" tt: ");tok<32?tok<3?_pTTn:_pD(tok):_pC(tok);}
void zstrcp(char *dst, char *src) {
while (*src) { *dst = *src; dst += 1; src += 1; }
*dst = *src;
}
int zstreq(char *a, char *b) {
while (1) {
if (*a != *b) return 0;
if (*a == 0 || *b == 0) return *a == *b;
a += 1; b += 1;
}
}
int zstrsz(char *s) {
int i;
i = 0;
while (*s) { i += 1; s += 1; }
return i;
}
void writec(int fd, int c) { write(fd, &c, 1); }
void writez(int fd, char *s) {
write(fd, s, zstrsz(s));
}
void writei(int fd, int n) {
if (n < 0) { writec(fd, '-'); return writei(fd, -n); }
if (n / 10) writei(fd, n / 10);
writec(fd, n % 10 + '0');
}
char *ifname;
int ilineno, icolmn;
void fail(char *msg) {
writez(2, msg);
writez(2, "\nfail @ ");
writez(2, ifname);
writec(2, ':');
writei(2, ilineno);
writec(2, ':');
writei(2, icolmn);
writec(2, '\n');
/* raise(SIGTRAP); */
_exit(1);
}
int ifd;
int ofd;
enum { TTEOF, TTNAM, TTINT };
enum { VARFLG_GLOBAL = 1, VARFLG_EXTERN = 2, VARFLG_STATIC = 4 };
enum { PTRSZ = 4, INTSZ = 4 };
enum { FEOF = -1 };
int tokp;
enum { MAXTOKNAMSZ = 16 };
int tok;
char toknam[16];
int tokint;
int nstr;
enum { MAXSTRSZ = 32 };
char strtab[8192]; /* 32x256. */
int readcc;
int readc(int fd) {
if (read(fd, &readcc, 1) == 0) readcc = FEOF;
if (readcc == '\n') { ilineno += 1; icolmn = 0; }
icolmn += 1;
return readcc;
}
/* vars are of the form Type{sizeof=esz} *{n=ptr}{name}[{asz}] */
int nvar; /* number of vars. */
enum { MAXVARNAMSZ = 16 };
char varnamtab[4096]; /* 16x256. zstr names. */
char varptrtab[256]; /* level of pointer inderection. 0 for none. */
char varesztab[256]; /* element size. */
int varasztab[256]; /* array size. 0 for not an array. */
int varofftab[256]; /* offset for locals, value for constants. */
char varflgtab[256]; /* flags. */
int getvarmemsz(int esz, int asz, int ptr) {
if (asz > 0) return getvarmemsz(esz, 0, ptr) * asz;
if (ptr > 0) return PTRSZ;
return esz;
}
int getvarmemszi(int i) {
return getvarmemsz(varesztab[i], varasztab[i], varptrtab[i]);
}
int getelmmemsz(int esz, int asz, int ptr) {
if (asz > 0) return getvarmemsz(esz, 0, ptr);
if (ptr > 0) return getvarmemsz(esz, 0, ptr - 1);
return esz;
}
int getvarsz(int esz, int asz, int ptr) {
if (asz > 0) return PTRSZ;
if (ptr > 0) return PTRSZ;
return esz;
}
int getvarszi(int i) {
return getvarsz(varesztab[i], varasztab[i], varptrtab[i]);
}
int getvarpmul(int esz, int asz, int ptr) {
if (asz > 0) return getvarsz(esz, 0, ptr);
if (ptr > 0) return getvarsz(esz, 0, ptr - 1);
return 1;
}
int getvarpmuli(int i) {
return getvarpmul(varesztab[i], varasztab[i], varptrtab[i]);
}
int findvar(char *name) {
int findidx;
findidx = 0;
while (findidx < nvar) {
if (zstreq(varnamtab + findidx * 16, name))
return findidx;
findidx += 1;
}
writez(2, "no var '");
writez(2, name);
writez(2, "'!\n");
fail("no var");
return -1;
}
int isid(int c) {
return c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
int isws(int c) { return c == ' ' || c == '\n' || c == '\t' || c == '\r'; }
int tread(char *otoknam, int *otokint) {
int readcc1;
while (isws(readcc)) readc(ifd);
if (readcc == FEOF) return TTEOF;
while (readcc == '#') while (readcc != '\n' || readcc != FEOF) readc(ifd);
while (readcc == '/') {
readc(ifd);
if (readcc == '/') {
while (readcc != '\n' || readcc != FEOF) readc(ifd);
} else if (readcc == '*') {
readc(ifd);
while (1) {
if (readcc == '*') { readc(ifd); if (readcc == '/') break; }
if (readcc == FEOF) return TTEOF;
}
readc(ifd);
} else return '/';
while (isws(readcc)) readc(ifd);
while (readcc == '#') while (readcc != '\n' || readcc != FEOF) readc(ifd);
if (readcc == FEOF) return TTEOF;
}
if (readcc >= '0' && readcc <= '9') {
*otokint = 0;
while (readcc >= '0' && readcc <= '9') {
*otokint = *otokint * 10 + readcc - '0';
readc(ifd);
if (readcc == FEOF) break;
}
return TTINT;
}
if (isid(readcc)) {
*otoknam = 0;
while (isid(readcc)) {
*otoknam = readcc;
otoknam += 1;
*otoknam = 0;
readc(ifd);
if (readcc == FEOF) break;
}
return TTNAM;
}
readcc1 = readcc;
readc(ifd);
return readcc1;
}
void tpeek() {
if (tokp) return;
tokp = 1;
tok = tread(toknam, &tokint);
}
void tnext() {
if (tokp) {
tokp = 0;
return;
}
tok = tread(toknam, &tokint);
}
void wnl() { writec(ofd, '\n'); }
void wslab(int n) { writez(ofd, "_S"); writei(ofd, n); }
void wlab(int n) { writez(ofd, ".L"); writei(ofd, n); }
void wlabdefn() { writez(ofd, ":\n"); }
int nslab;
int wnslab() { wslab(nslab); wlabdefn(); nslab += 1; return nslab - 1; }
int nlab;
int wnlab() { wslab(nlab); wlabdefn(); nlab += 1; return nlab - 1; }
void wjmpl(int l) { writez(ofd, "\tjmp "); wlab(l); wnl(); }
void wab() { writez(ofd, " eax,ecx"); }
void wmov() { writez(ofd, "\tmov "); }
void wmova() { wmov(); writez(ofd, "eax,"); }
void wpusha() { writez(ofd, "\tpush eax\n"); }
void wpopa() { writez(ofd, "\tpop eax\n"); }
void wpushc() { writez(ofd, "\tpush ecx\n"); }
void wpopc() { writez(ofd, "\tpop ecx\n"); }
void wmovca() { wmov(); writez(ofd, "ecx,eax\n"); }
void wmovba() { wmov(); writez(ofd, "ebx,eax\n"); }
void wderefsz(int sz) {
if (sz == 4) writez(ofd, "dword");
if (sz == 2) writez(ofd, "word");
if (sz == 1) writez(ofd, "byte");
}
void wsza(int sz) {
if (sz == 4) writez(ofd, "eax");
if (sz == 2) writez(ofd, "ax");
if (sz == 1) writez(ofd, "al");
}
void wderefn(int sz, char *nam) {
wderefsz(sz);
writec(ofd, '[');
writez(ofd, nam);
writec(ofd, ']');
}
void wderefo(int sz, int off) {
if (off == 0) return wderefn(sz, "ebp");
wderefsz(sz);
writez(ofd, "[ebp");
if (off > 0) writec(ofd, '+');
writei(ofd, off);
writec(ofd, ']');
}
int pexp(int *ptr);
/* toknam - function name. tok(peek) == '(' */
int pfcall(int *ptr) {
return INTSZ;
}
/* pprimary.varset [private] */
int pprimaryvarset(int i, int esz, int *ptr) {
char nam[16];
zstrcp(nam, toknam);
tnext();
pexp(ptr);
wmov();
if (varflgtab[i] & VARFLG_GLOBAL) {
if (varasztab[i] > 0) fail("cannot assign arrays");
wderefn(esz, nam);
} else {
wderefo(esz, varofftab[i]);
}
writec(ofd, ',');
wsza(esz);
wnl();
return esz;
}
int pprimary(int *ptr, int lval) {
int i; int esz;
tnext();
if (tok == TTINT) {
if (tokint != 0) {
wmova();
writei(ofd, tokint);
} else {
writez(ofd, "\txor eax,eax");
}
wnl();
*ptr = 0;
return 1;
}
if (tok == TTNAM) {
i = findvar(toknam);
esz = getvarszi(i);
tpeek();
if (tok == '(') return pfcall(ptr);
_pTT;
if (lval && tok == '=') return pprimaryvarset(i, esz, ptr);
wmov();
wsza(esz);
writec(ofd, ',');
if (varflgtab[i] & VARFLG_GLOBAL) {
if (varasztab[i] > 0) writez(ofd, toknam);
else wderefn(esz, toknam);
} else {
wderefo(esz, varofftab[i]);
}
wnl();
*ptr = varptrtab[i];
return getvarpmuli(i);
}
if (tok == '(') {
esz = pexp(ptr);
tnext();
if (tok != ')') fail("need )");
return esz;
}
fail("need name, int or ( for atom");
return 0;
}
int plvalprimary(int *ptr) {
int i; int esz;
tnext();
if (tok == TTNAM) {
i = findvar(toknam);
esz = getvarszi(i);
if (varflgtab[i] & VARFLG_GLOBAL) {
if (varasztab[i] > 0) fail("arrays arent lvalues");
wmova(); writez(ofd, toknam);
} else {
writez(ofd, "\tlea eax,");
wderefo(esz, varofftab[i]);
}
wnl();
*ptr = varptrtab[i];
return getvarpmuli(i);
}
if (tok == '(') {
esz = plvalprimary(ptr);
tnext();
if (tok != ')') fail("need )");
return esz;
}
fail("need name or ( for lvalue");
return esz;
}
void wptraddac(int esz) {
writez(ofd, "\tmul ecx,"); writei(ofd, esz); wnl();
writez(ofd, "\tadd eax,ecx"); wnl();
}
int plvalpostfix(int *ptr) {
int esz; int ptr2;
esz = plvalprimary(ptr);
tpeek();
if (tok == '[') {
tnext();
wpusha();
pexp(&ptr2);
if (ptr2 > 0) fail("cant index with ptr");
tnext();
if (tok != ']') fail("need ]");
wmovca();
wpopa();
wptraddac(esz);
/* todo: multiple postfix []? */
}
return esz;
}
int ppostfix(int *ptr, int lval) {
int esz;
esz = pprimary(ptr, lval);
return esz;
}
int punary(int *ptr, int lval) {
int esz; int ptr2;
tpeek();
if (tok == '&') {
tnext();
plvalpostfix(ptr);
*ptr = *ptr + 1;
/*wderefn(4, "eax");*/
return esz;
}
if (tok == '*') {
tnext();
esz = punary(ptr, 0);
if (*ptr < 1) fail("cant deref a non-ptr");
tpeek();
if (tok == '=') {
tnext();
wmovca();
wpushc();
pexp(&ptr2);
if (ptr2 != *ptr - 1) {
writei(2, *ptr); writez(2, " != ");
writei(2, ptr2);
fail(" - ptr num assign mismatch");
}
wpopc();
wmov(); wderefn(4, "ecx"); writec(ofd, ','); wsza(esz); wnl();
} else {
wmova();
wderefn(4, "eax");
wnl();
}
*ptr = *ptr - 1;
return esz;
}
return ppostfix(ptr, lval);
}
int pmul(int *ptr, int lval) {
int o; int esz;
esz = punary(ptr, lval);
tpeek();
while (tok == '*' || tok == '/' || tok == '%') {
if (esz > 1) fail("cant mul/div/mod ptr");
tnext();
o = tok;
wpusha();
if (punary(ptr, 0) > 1) fail("cant mul/div/mod by ptr");
wmovca();
wpopa();
if (o == '*') {
writez(ofd, "\timul");
wab();
} else {
writez(ofd, "\tcdq\n");
writez(ofd, "\tidiv ecx");
if (o == '%') writez(ofd, "\n\tmov eax,edx");
}
wnl();
}
return esz;
}
int padd(int *ptr, int lval) {
int o; int esz;
esz = pmul(ptr, lval);
tpeek();
while (tok == '+' || tok == '-') {
tnext();
o = tok;
wpusha();
if (pmul(ptr, 0) > 1) fail("cant add/sub two ptrs");
if (esz > 1) {
writez(ofd, "mul ecx,");
writei(ofd, esz);
wnl();
}
wmovca();
wpopa();
if (o == '+') writez(ofd, "\tadd");
if (o == '-') writez(ofd, "\tsub");
wab();
wnl();
}
return esz;
}
void pcmpeq(int *ptr, int lval) {
int o;
pmul(ptr, lval);
tpeek();
while (tok == '>' || tok == '<') {
tnext();
o = tok;
wpusha();
pmul(ptr, 0);
wmovca();
wpopa();
if (o == '+') writez(ofd, "\tadd");
if (o == '-') writez(ofd, "\tsub");
wab();
wnl();
}
}
int pexp(int *ptr) {
return padd(ptr, 1);
}
void pfunc() {
}
void penum() {
char enam[16];
int val;
val = 0;
tnext();
tnext();
if (tok != TTNAM) fail("need name for enum");
zstrcp(enam, toknam);
tpeek(); if (tok != '{') fail("need { for enum");
do {
tnext();
if (tok != TTNAM) fail("need name for enum");
tnext();
if (tok == '=') {
tnext();
if (tok != TTINT) fail("need value for enum item");
val = tokint;
}
} while(tok == ',');
tnext(); if (tok != '}') fail("need } for enum");
tnext(); if (tok != ';') fail("need ; for enum");
}
int istypnam() {
if (zstreq(toknam, "int")) return INTSZ;
else if (zstreq(toknam, "char")) return 1;
return 0;
}
int ptype(int *ptr) {
int esz;
*ptr = 0;
tnext();
if ((esz = istypnam()) == 0) {
writez(2, toknam);
fail(": bad typename");
}
tpeek();
while (tok == '*') {
tnext();
*ptr += 1;
tpeek();
}
return esz;
}
/* needs toknam - variable name and tok == '[' || tok == ';' */
void pvardecl(int esz, int ptr, int flg) {
int asz;
asz = 0;
if (tok == '[') {
tnext();
if (tok != TTINT) fail("need array size for var decl");
asz = tokint;
tnext();
if (tok != ']') fail("need ] for var decl");
tnext();
}
varasztab[nvar] = asz;
varesztab[nvar] = esz;
varptrtab[nvar] = ptr;
varofftab[nvar] = 0;
varflgtab[nvar] = flg;
zstrcp(varnamtab + nvar * MAXVARNAMSZ, toknam);
nvar += 1;
if (tok != ';') fail("need ; for var decl");
}
void pstmt() {
int dummy;
tpeek(); if (tok == ';') return tnext();
if (tok == TTNAM) {
if (zstreq(toknam, "return")) {
tnext(); tpeek();
if (tok != ';') {
pexp(&dummy);
tpeek();
if (tok != ';') fail("need ; for return");
}
tnext();
wjmpl(0);
return;
}
}
pexp(&dummy);
/* _pTT fail("need return for stmt"); */
}
/* needs toknam - variable name and tok == '(' */
void pfundecl() {
int lastnvar;
int argptr; int argesz; int argoff;
int localoff;
argoff = 0;
writez(ofd, "global "); writez(ofd, toknam); wnl();
writez(ofd, toknam); wlabdefn();
writez(ofd, "\tpush ebp\n");
writez(ofd, "\tmov ebp,esp\n");
lastnvar = nvar;
nlab = 1;
tpeek();
if (tok != ')') {
do {
argesz = ptype(&argptr);
tnext();
if (tok != TTNAM) fail("need name for arg");
varasztab[nvar] = 0;
varesztab[nvar] = argesz;
varptrtab[nvar] = argptr;
varofftab[nvar] = argoff;
zstrcp(varnamtab + nvar * MAXVARNAMSZ, toknam);
argoff += getvarszi(nvar);
tnext();
} while (tok == ',');
if (tok != ')') fail("need ) for arg list in fdecl");
} else {
tnext();
}
tpeek();
if (tok == ';') {
nvar = lastnvar;
return;
}
if (tok != '{') fail("need { or ; for fdecl");
tnext();
while (tok != '}' && tok != TTEOF) {
pstmt();
tpeek();
}
tnext();
if (tok != '}') fail("need } for fdefn");
wlab(0); wlabdefn();
writez(ofd, "\tleave\n\tret\n");
nvar = lastnvar;
}
/* toplevel */
void ptoplvl() {
int et; int pt; int varflg;
tpeek();
if (tok == TTEOF) return;
if (tok == TTNAM && zstreq(toknam, "enum")) return penum();
varflg = VARFLG_GLOBAL;
if (tok == TTNAM && zstreq(toknam, "extern")) {
tnext();
varflg = varflg | VARFLG_EXTERN;
} else if (tok == TTNAM && zstreq(toknam, "static")) {
tnext();
varflg = varflg | VARFLG_STATIC;
}
et = ptype(&pt);
tnext();
if (tok != TTNAM) fail("need name for decl or defn");
tnext();
if (tok == '[' || tok == ';') {
pvardecl(et, pt, varflg);
} else if (tok == '(') {
pfundecl();
} else fail("need (, [ or ; for decl");
}
int main(int argc, char **argv) {
int i;
ifname = argv[1];
ilineno = 1;
ifd = open(ifname, 0);
readc(ifd);
ofd = 1;
writez(ofd, "bits 32\n");
tpeek();
while (tok != TTEOF) {
ptoplvl();
tpeek();
}
writez(ofd, "section .bss\n");
i = 0;
while (i < nvar) {
if (varflgtab[i] & VARFLG_EXTERN) {
writez(ofd, "extern\t");
writez(ofd, varnamtab + i * MAXVARNAMSZ);
} else if (varflgtab[i] & VARFLG_GLOBAL) {
if (!(varflgtab[i] & VARFLG_STATIC)) {
writez(ofd, "global\t");
writez(ofd, varnamtab + i * MAXVARNAMSZ);
wnl();
}
writez(ofd, varnamtab + i * MAXVARNAMSZ);
writez(ofd, "\tresb ");
writei(ofd, getvarmemszi(i));
}
wnl();
i += 1;
}
close(ifd);
return 0;
/* close(ofd); */
}
@coinconclusive
Copy link
Author

Ninja file for building/testing

cflags = -m32 -std=c89 -g
nasmflags = -f elf32 -w-ptr
ldflags =

rule comp
  command = ./comp $in > $out

rule nasm
  command = nasm $in -o $out $nasmflags

rule gcc
  command = gcc $in -o $out $cflags

rule ld
  command = ld.lld $in -o $out $ldflags

rule run
  command = ./$in; echo "exit code: $$?"
  pool = console

build comp: gcc comp.c
build test.s: comp test.c | comp
build test.o: nasm test.s
build crt0.o: nasm crt0.s
build test: ld test.o crt0.o

default test

build run-test: run test

Testing file (test.c)

static int *ptr;
static int val;

int main() {
	ptr = &val;
	*ptr = 123;
	return *ptr;
}

Runtime file (crt0.s)

bits 32
global _start
_start:
extern main
	call main
	jmp _exit_linux_eax

global _linux_exit ; _Noreturn void _linux_exit(int code)
_linux_exit:
	mov eax, [esp]
_exit_linux_eax:
	mov ebx, eax
	mov eax, 1
	int 80h

global _linux_write ; int _linux_write(int fd, char *buf, int n)
_linux_write:
	mov ebx, eax
	mov eax, 1
	int 80h

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment