Created
June 22, 2023 13:50
-
-
Save coinconclusive/5df19b63009ac9b9b0f2ec68dda6035b to your computer and use it in GitHub Desktop.
WIP (planned to be) selfhosting c subset compiler
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <signal.h> /* raise, SIGTRAP (in fail, for debugging) */ | |
#include <stdio.h> /* fprintf, stderr (in _DBG, for debugging) */ | |
#include <unistd.h> /* write, read, _exit */ | |
#include <fcntl.h> /* open */ | |
#define _DBG(...) fprintf(stderr,__VA_ARGS__) | |
#define _DBGfn _DBG("%s:%d",__FILE__,__LINE__) | |
#define _TN ((char*[]){"EOF","NAM","INT"})[tok] | |
#define _pS(S) _DBG("%s\n",S) | |
#define _pD(S) _DBG("%d\n",S) | |
#define _pC(S) _DBG("'%c'\n",S) | |
#define _pTTn (_DBG("%s:",_TN),tok==1?_pS(toknam):tok==2?_pD(tokint):0) | |
#define _pTT {_DBGfn;_DBG(" tt: ");tok<32?tok<3?_pTTn:_pD(tok):_pC(tok);} | |
void zstrcp(char *dst, char *src) { | |
while (*src) { *dst = *src; dst += 1; src += 1; } | |
*dst = *src; | |
} | |
int zstreq(char *a, char *b) { | |
while (1) { | |
if (*a != *b) return 0; | |
if (*a == 0 || *b == 0) return *a == *b; | |
a += 1; b += 1; | |
} | |
} | |
int zstrsz(char *s) { | |
int i; | |
i = 0; | |
while (*s) { i += 1; s += 1; } | |
return i; | |
} | |
void writec(int fd, int c) { write(fd, &c, 1); } | |
void writez(int fd, char *s) { | |
write(fd, s, zstrsz(s)); | |
} | |
void writei(int fd, int n) { | |
if (n < 0) { writec(fd, '-'); return writei(fd, -n); } | |
if (n / 10) writei(fd, n / 10); | |
writec(fd, n % 10 + '0'); | |
} | |
char *ifname; | |
int ilineno, icolmn; | |
void fail(char *msg) { | |
writez(2, msg); | |
writez(2, "\nfail @ "); | |
writez(2, ifname); | |
writec(2, ':'); | |
writei(2, ilineno); | |
writec(2, ':'); | |
writei(2, icolmn); | |
writec(2, '\n'); | |
/* raise(SIGTRAP); */ | |
_exit(1); | |
} | |
int ifd; | |
int ofd; | |
enum { TTEOF, TTNAM, TTINT }; | |
enum { VARFLG_GLOBAL = 1, VARFLG_EXTERN = 2, VARFLG_STATIC = 4 }; | |
enum { PTRSZ = 4, INTSZ = 4 }; | |
enum { FEOF = -1 }; | |
int tokp; | |
enum { MAXTOKNAMSZ = 16 }; | |
int tok; | |
char toknam[16]; | |
int tokint; | |
int nstr; | |
enum { MAXSTRSZ = 32 }; | |
char strtab[8192]; /* 32x256. */ | |
int readcc; | |
int readc(int fd) { | |
if (read(fd, &readcc, 1) == 0) readcc = FEOF; | |
if (readcc == '\n') { ilineno += 1; icolmn = 0; } | |
icolmn += 1; | |
return readcc; | |
} | |
/* vars are of the form Type{sizeof=esz} *{n=ptr}{name}[{asz}] */ | |
int nvar; /* number of vars. */ | |
enum { MAXVARNAMSZ = 16 }; | |
char varnamtab[4096]; /* 16x256. zstr names. */ | |
char varptrtab[256]; /* level of pointer inderection. 0 for none. */ | |
char varesztab[256]; /* element size. */ | |
int varasztab[256]; /* array size. 0 for not an array. */ | |
int varofftab[256]; /* offset for locals, value for constants. */ | |
char varflgtab[256]; /* flags. */ | |
int getvarmemsz(int esz, int asz, int ptr) { | |
if (asz > 0) return getvarmemsz(esz, 0, ptr) * asz; | |
if (ptr > 0) return PTRSZ; | |
return esz; | |
} | |
int getvarmemszi(int i) { | |
return getvarmemsz(varesztab[i], varasztab[i], varptrtab[i]); | |
} | |
int getelmmemsz(int esz, int asz, int ptr) { | |
if (asz > 0) return getvarmemsz(esz, 0, ptr); | |
if (ptr > 0) return getvarmemsz(esz, 0, ptr - 1); | |
return esz; | |
} | |
int getvarsz(int esz, int asz, int ptr) { | |
if (asz > 0) return PTRSZ; | |
if (ptr > 0) return PTRSZ; | |
return esz; | |
} | |
int getvarszi(int i) { | |
return getvarsz(varesztab[i], varasztab[i], varptrtab[i]); | |
} | |
int getvarpmul(int esz, int asz, int ptr) { | |
if (asz > 0) return getvarsz(esz, 0, ptr); | |
if (ptr > 0) return getvarsz(esz, 0, ptr - 1); | |
return 1; | |
} | |
int getvarpmuli(int i) { | |
return getvarpmul(varesztab[i], varasztab[i], varptrtab[i]); | |
} | |
int findvar(char *name) { | |
int findidx; | |
findidx = 0; | |
while (findidx < nvar) { | |
if (zstreq(varnamtab + findidx * 16, name)) | |
return findidx; | |
findidx += 1; | |
} | |
writez(2, "no var '"); | |
writez(2, name); | |
writez(2, "'!\n"); | |
fail("no var"); | |
return -1; | |
} | |
int isid(int c) { | |
return c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); | |
} | |
int isws(int c) { return c == ' ' || c == '\n' || c == '\t' || c == '\r'; } | |
int tread(char *otoknam, int *otokint) { | |
int readcc1; | |
while (isws(readcc)) readc(ifd); | |
if (readcc == FEOF) return TTEOF; | |
while (readcc == '#') while (readcc != '\n' || readcc != FEOF) readc(ifd); | |
while (readcc == '/') { | |
readc(ifd); | |
if (readcc == '/') { | |
while (readcc != '\n' || readcc != FEOF) readc(ifd); | |
} else if (readcc == '*') { | |
readc(ifd); | |
while (1) { | |
if (readcc == '*') { readc(ifd); if (readcc == '/') break; } | |
if (readcc == FEOF) return TTEOF; | |
} | |
readc(ifd); | |
} else return '/'; | |
while (isws(readcc)) readc(ifd); | |
while (readcc == '#') while (readcc != '\n' || readcc != FEOF) readc(ifd); | |
if (readcc == FEOF) return TTEOF; | |
} | |
if (readcc >= '0' && readcc <= '9') { | |
*otokint = 0; | |
while (readcc >= '0' && readcc <= '9') { | |
*otokint = *otokint * 10 + readcc - '0'; | |
readc(ifd); | |
if (readcc == FEOF) break; | |
} | |
return TTINT; | |
} | |
if (isid(readcc)) { | |
*otoknam = 0; | |
while (isid(readcc)) { | |
*otoknam = readcc; | |
otoknam += 1; | |
*otoknam = 0; | |
readc(ifd); | |
if (readcc == FEOF) break; | |
} | |
return TTNAM; | |
} | |
readcc1 = readcc; | |
readc(ifd); | |
return readcc1; | |
} | |
void tpeek() { | |
if (tokp) return; | |
tokp = 1; | |
tok = tread(toknam, &tokint); | |
} | |
void tnext() { | |
if (tokp) { | |
tokp = 0; | |
return; | |
} | |
tok = tread(toknam, &tokint); | |
} | |
void wnl() { writec(ofd, '\n'); } | |
void wslab(int n) { writez(ofd, "_S"); writei(ofd, n); } | |
void wlab(int n) { writez(ofd, ".L"); writei(ofd, n); } | |
void wlabdefn() { writez(ofd, ":\n"); } | |
int nslab; | |
int wnslab() { wslab(nslab); wlabdefn(); nslab += 1; return nslab - 1; } | |
int nlab; | |
int wnlab() { wslab(nlab); wlabdefn(); nlab += 1; return nlab - 1; } | |
void wjmpl(int l) { writez(ofd, "\tjmp "); wlab(l); wnl(); } | |
void wab() { writez(ofd, " eax,ecx"); } | |
void wmov() { writez(ofd, "\tmov "); } | |
void wmova() { wmov(); writez(ofd, "eax,"); } | |
void wpusha() { writez(ofd, "\tpush eax\n"); } | |
void wpopa() { writez(ofd, "\tpop eax\n"); } | |
void wpushc() { writez(ofd, "\tpush ecx\n"); } | |
void wpopc() { writez(ofd, "\tpop ecx\n"); } | |
void wmovca() { wmov(); writez(ofd, "ecx,eax\n"); } | |
void wmovba() { wmov(); writez(ofd, "ebx,eax\n"); } | |
void wderefsz(int sz) { | |
if (sz == 4) writez(ofd, "dword"); | |
if (sz == 2) writez(ofd, "word"); | |
if (sz == 1) writez(ofd, "byte"); | |
} | |
void wsza(int sz) { | |
if (sz == 4) writez(ofd, "eax"); | |
if (sz == 2) writez(ofd, "ax"); | |
if (sz == 1) writez(ofd, "al"); | |
} | |
void wderefn(int sz, char *nam) { | |
wderefsz(sz); | |
writec(ofd, '['); | |
writez(ofd, nam); | |
writec(ofd, ']'); | |
} | |
void wderefo(int sz, int off) { | |
if (off == 0) return wderefn(sz, "ebp"); | |
wderefsz(sz); | |
writez(ofd, "[ebp"); | |
if (off > 0) writec(ofd, '+'); | |
writei(ofd, off); | |
writec(ofd, ']'); | |
} | |
int pexp(int *ptr); | |
/* toknam - function name. tok(peek) == '(' */ | |
int pfcall(int *ptr) { | |
return INTSZ; | |
} | |
/* pprimary.varset [private] */ | |
int pprimaryvarset(int i, int esz, int *ptr) { | |
char nam[16]; | |
zstrcp(nam, toknam); | |
tnext(); | |
pexp(ptr); | |
wmov(); | |
if (varflgtab[i] & VARFLG_GLOBAL) { | |
if (varasztab[i] > 0) fail("cannot assign arrays"); | |
wderefn(esz, nam); | |
} else { | |
wderefo(esz, varofftab[i]); | |
} | |
writec(ofd, ','); | |
wsza(esz); | |
wnl(); | |
return esz; | |
} | |
int pprimary(int *ptr, int lval) { | |
int i; int esz; | |
tnext(); | |
if (tok == TTINT) { | |
if (tokint != 0) { | |
wmova(); | |
writei(ofd, tokint); | |
} else { | |
writez(ofd, "\txor eax,eax"); | |
} | |
wnl(); | |
*ptr = 0; | |
return 1; | |
} | |
if (tok == TTNAM) { | |
i = findvar(toknam); | |
esz = getvarszi(i); | |
tpeek(); | |
if (tok == '(') return pfcall(ptr); | |
_pTT; | |
if (lval && tok == '=') return pprimaryvarset(i, esz, ptr); | |
wmov(); | |
wsza(esz); | |
writec(ofd, ','); | |
if (varflgtab[i] & VARFLG_GLOBAL) { | |
if (varasztab[i] > 0) writez(ofd, toknam); | |
else wderefn(esz, toknam); | |
} else { | |
wderefo(esz, varofftab[i]); | |
} | |
wnl(); | |
*ptr = varptrtab[i]; | |
return getvarpmuli(i); | |
} | |
if (tok == '(') { | |
esz = pexp(ptr); | |
tnext(); | |
if (tok != ')') fail("need )"); | |
return esz; | |
} | |
fail("need name, int or ( for atom"); | |
return 0; | |
} | |
int plvalprimary(int *ptr) { | |
int i; int esz; | |
tnext(); | |
if (tok == TTNAM) { | |
i = findvar(toknam); | |
esz = getvarszi(i); | |
if (varflgtab[i] & VARFLG_GLOBAL) { | |
if (varasztab[i] > 0) fail("arrays arent lvalues"); | |
wmova(); writez(ofd, toknam); | |
} else { | |
writez(ofd, "\tlea eax,"); | |
wderefo(esz, varofftab[i]); | |
} | |
wnl(); | |
*ptr = varptrtab[i]; | |
return getvarpmuli(i); | |
} | |
if (tok == '(') { | |
esz = plvalprimary(ptr); | |
tnext(); | |
if (tok != ')') fail("need )"); | |
return esz; | |
} | |
fail("need name or ( for lvalue"); | |
return esz; | |
} | |
void wptraddac(int esz) { | |
writez(ofd, "\tmul ecx,"); writei(ofd, esz); wnl(); | |
writez(ofd, "\tadd eax,ecx"); wnl(); | |
} | |
int plvalpostfix(int *ptr) { | |
int esz; int ptr2; | |
esz = plvalprimary(ptr); | |
tpeek(); | |
if (tok == '[') { | |
tnext(); | |
wpusha(); | |
pexp(&ptr2); | |
if (ptr2 > 0) fail("cant index with ptr"); | |
tnext(); | |
if (tok != ']') fail("need ]"); | |
wmovca(); | |
wpopa(); | |
wptraddac(esz); | |
/* todo: multiple postfix []? */ | |
} | |
return esz; | |
} | |
int ppostfix(int *ptr, int lval) { | |
int esz; | |
esz = pprimary(ptr, lval); | |
return esz; | |
} | |
int punary(int *ptr, int lval) { | |
int esz; int ptr2; | |
tpeek(); | |
if (tok == '&') { | |
tnext(); | |
plvalpostfix(ptr); | |
*ptr = *ptr + 1; | |
/*wderefn(4, "eax");*/ | |
return esz; | |
} | |
if (tok == '*') { | |
tnext(); | |
esz = punary(ptr, 0); | |
if (*ptr < 1) fail("cant deref a non-ptr"); | |
tpeek(); | |
if (tok == '=') { | |
tnext(); | |
wmovca(); | |
wpushc(); | |
pexp(&ptr2); | |
if (ptr2 != *ptr - 1) { | |
writei(2, *ptr); writez(2, " != "); | |
writei(2, ptr2); | |
fail(" - ptr num assign mismatch"); | |
} | |
wpopc(); | |
wmov(); wderefn(4, "ecx"); writec(ofd, ','); wsza(esz); wnl(); | |
} else { | |
wmova(); | |
wderefn(4, "eax"); | |
wnl(); | |
} | |
*ptr = *ptr - 1; | |
return esz; | |
} | |
return ppostfix(ptr, lval); | |
} | |
int pmul(int *ptr, int lval) { | |
int o; int esz; | |
esz = punary(ptr, lval); | |
tpeek(); | |
while (tok == '*' || tok == '/' || tok == '%') { | |
if (esz > 1) fail("cant mul/div/mod ptr"); | |
tnext(); | |
o = tok; | |
wpusha(); | |
if (punary(ptr, 0) > 1) fail("cant mul/div/mod by ptr"); | |
wmovca(); | |
wpopa(); | |
if (o == '*') { | |
writez(ofd, "\timul"); | |
wab(); | |
} else { | |
writez(ofd, "\tcdq\n"); | |
writez(ofd, "\tidiv ecx"); | |
if (o == '%') writez(ofd, "\n\tmov eax,edx"); | |
} | |
wnl(); | |
} | |
return esz; | |
} | |
int padd(int *ptr, int lval) { | |
int o; int esz; | |
esz = pmul(ptr, lval); | |
tpeek(); | |
while (tok == '+' || tok == '-') { | |
tnext(); | |
o = tok; | |
wpusha(); | |
if (pmul(ptr, 0) > 1) fail("cant add/sub two ptrs"); | |
if (esz > 1) { | |
writez(ofd, "mul ecx,"); | |
writei(ofd, esz); | |
wnl(); | |
} | |
wmovca(); | |
wpopa(); | |
if (o == '+') writez(ofd, "\tadd"); | |
if (o == '-') writez(ofd, "\tsub"); | |
wab(); | |
wnl(); | |
} | |
return esz; | |
} | |
void pcmpeq(int *ptr, int lval) { | |
int o; | |
pmul(ptr, lval); | |
tpeek(); | |
while (tok == '>' || tok == '<') { | |
tnext(); | |
o = tok; | |
wpusha(); | |
pmul(ptr, 0); | |
wmovca(); | |
wpopa(); | |
if (o == '+') writez(ofd, "\tadd"); | |
if (o == '-') writez(ofd, "\tsub"); | |
wab(); | |
wnl(); | |
} | |
} | |
int pexp(int *ptr) { | |
return padd(ptr, 1); | |
} | |
void pfunc() { | |
} | |
void penum() { | |
char enam[16]; | |
int val; | |
val = 0; | |
tnext(); | |
tnext(); | |
if (tok != TTNAM) fail("need name for enum"); | |
zstrcp(enam, toknam); | |
tpeek(); if (tok != '{') fail("need { for enum"); | |
do { | |
tnext(); | |
if (tok != TTNAM) fail("need name for enum"); | |
tnext(); | |
if (tok == '=') { | |
tnext(); | |
if (tok != TTINT) fail("need value for enum item"); | |
val = tokint; | |
} | |
} while(tok == ','); | |
tnext(); if (tok != '}') fail("need } for enum"); | |
tnext(); if (tok != ';') fail("need ; for enum"); | |
} | |
int istypnam() { | |
if (zstreq(toknam, "int")) return INTSZ; | |
else if (zstreq(toknam, "char")) return 1; | |
return 0; | |
} | |
int ptype(int *ptr) { | |
int esz; | |
*ptr = 0; | |
tnext(); | |
if ((esz = istypnam()) == 0) { | |
writez(2, toknam); | |
fail(": bad typename"); | |
} | |
tpeek(); | |
while (tok == '*') { | |
tnext(); | |
*ptr += 1; | |
tpeek(); | |
} | |
return esz; | |
} | |
/* needs toknam - variable name and tok == '[' || tok == ';' */ | |
void pvardecl(int esz, int ptr, int flg) { | |
int asz; | |
asz = 0; | |
if (tok == '[') { | |
tnext(); | |
if (tok != TTINT) fail("need array size for var decl"); | |
asz = tokint; | |
tnext(); | |
if (tok != ']') fail("need ] for var decl"); | |
tnext(); | |
} | |
varasztab[nvar] = asz; | |
varesztab[nvar] = esz; | |
varptrtab[nvar] = ptr; | |
varofftab[nvar] = 0; | |
varflgtab[nvar] = flg; | |
zstrcp(varnamtab + nvar * MAXVARNAMSZ, toknam); | |
nvar += 1; | |
if (tok != ';') fail("need ; for var decl"); | |
} | |
void pstmt() { | |
int dummy; | |
tpeek(); if (tok == ';') return tnext(); | |
if (tok == TTNAM) { | |
if (zstreq(toknam, "return")) { | |
tnext(); tpeek(); | |
if (tok != ';') { | |
pexp(&dummy); | |
tpeek(); | |
if (tok != ';') fail("need ; for return"); | |
} | |
tnext(); | |
wjmpl(0); | |
return; | |
} | |
} | |
pexp(&dummy); | |
/* _pTT fail("need return for stmt"); */ | |
} | |
/* needs toknam - variable name and tok == '(' */ | |
void pfundecl() { | |
int lastnvar; | |
int argptr; int argesz; int argoff; | |
int localoff; | |
argoff = 0; | |
writez(ofd, "global "); writez(ofd, toknam); wnl(); | |
writez(ofd, toknam); wlabdefn(); | |
writez(ofd, "\tpush ebp\n"); | |
writez(ofd, "\tmov ebp,esp\n"); | |
lastnvar = nvar; | |
nlab = 1; | |
tpeek(); | |
if (tok != ')') { | |
do { | |
argesz = ptype(&argptr); | |
tnext(); | |
if (tok != TTNAM) fail("need name for arg"); | |
varasztab[nvar] = 0; | |
varesztab[nvar] = argesz; | |
varptrtab[nvar] = argptr; | |
varofftab[nvar] = argoff; | |
zstrcp(varnamtab + nvar * MAXVARNAMSZ, toknam); | |
argoff += getvarszi(nvar); | |
tnext(); | |
} while (tok == ','); | |
if (tok != ')') fail("need ) for arg list in fdecl"); | |
} else { | |
tnext(); | |
} | |
tpeek(); | |
if (tok == ';') { | |
nvar = lastnvar; | |
return; | |
} | |
if (tok != '{') fail("need { or ; for fdecl"); | |
tnext(); | |
while (tok != '}' && tok != TTEOF) { | |
pstmt(); | |
tpeek(); | |
} | |
tnext(); | |
if (tok != '}') fail("need } for fdefn"); | |
wlab(0); wlabdefn(); | |
writez(ofd, "\tleave\n\tret\n"); | |
nvar = lastnvar; | |
} | |
/* toplevel */ | |
void ptoplvl() { | |
int et; int pt; int varflg; | |
tpeek(); | |
if (tok == TTEOF) return; | |
if (tok == TTNAM && zstreq(toknam, "enum")) return penum(); | |
varflg = VARFLG_GLOBAL; | |
if (tok == TTNAM && zstreq(toknam, "extern")) { | |
tnext(); | |
varflg = varflg | VARFLG_EXTERN; | |
} else if (tok == TTNAM && zstreq(toknam, "static")) { | |
tnext(); | |
varflg = varflg | VARFLG_STATIC; | |
} | |
et = ptype(&pt); | |
tnext(); | |
if (tok != TTNAM) fail("need name for decl or defn"); | |
tnext(); | |
if (tok == '[' || tok == ';') { | |
pvardecl(et, pt, varflg); | |
} else if (tok == '(') { | |
pfundecl(); | |
} else fail("need (, [ or ; for decl"); | |
} | |
int main(int argc, char **argv) { | |
int i; | |
ifname = argv[1]; | |
ilineno = 1; | |
ifd = open(ifname, 0); | |
readc(ifd); | |
ofd = 1; | |
writez(ofd, "bits 32\n"); | |
tpeek(); | |
while (tok != TTEOF) { | |
ptoplvl(); | |
tpeek(); | |
} | |
writez(ofd, "section .bss\n"); | |
i = 0; | |
while (i < nvar) { | |
if (varflgtab[i] & VARFLG_EXTERN) { | |
writez(ofd, "extern\t"); | |
writez(ofd, varnamtab + i * MAXVARNAMSZ); | |
} else if (varflgtab[i] & VARFLG_GLOBAL) { | |
if (!(varflgtab[i] & VARFLG_STATIC)) { | |
writez(ofd, "global\t"); | |
writez(ofd, varnamtab + i * MAXVARNAMSZ); | |
wnl(); | |
} | |
writez(ofd, varnamtab + i * MAXVARNAMSZ); | |
writez(ofd, "\tresb "); | |
writei(ofd, getvarmemszi(i)); | |
} | |
wnl(); | |
i += 1; | |
} | |
close(ifd); | |
return 0; | |
/* close(ofd); */ | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ninja file for building/testing
Testing file (
test.c
)Runtime file (
crt0.s
)