Last active
July 10, 2021 09:15
-
-
Save vurtun/5bbf587095371abf4021bf811ce28933 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <string.h> | |
enum jstates {JFAILED, JNXT, JSEP, JUP, JDWN, JQUP, JQDWN, JESC, JUNESC, JBARE, JUNBARE, JUTF8_2, | |
JUTF8_3, JUTF8_4, JUTF8_N, JSTATE_CNT}; | |
enum jtype {JEOS,JERR,JTSEP,JOBJB,JOBJE,JARRB,JARRE,JNUM,JSTR,JTRUE,JFALSE,JNULL}; | |
struct jtok {int type, len; const char *str;}; | |
struct jctx {const unsigned char *tbl; const char *cur, *end;}; | |
static const unsigned char jtbl[256] = { | |
[48 ... 56]=JBARE, ['\t']=JNXT, ['\r']=JNXT, ['\n']=JNXT, [' ']=JNXT, | |
['"']=JQUP, [':']=JSEP, ['=']=JSEP, [',']=JNXT, ['[']=JUP, [']']=JDWN, | |
['{']=JUP, ['}']=JDWN, ['-']=JBARE, ['t']=JBARE, ['f']=JBARE, ['n']=JBARE}; | |
static const unsigned char jbare[256] = { | |
[32 ... 43]=JNXT, [44]=JUNBARE, [45 ... 92]=JNXT, [93]=JUNBARE, [94 ... 124]=JNXT, | |
[125]=JUNBARE, [126]=JNXT, ['\t']=JUNBARE, ['\r']=JUNBARE, ['\n']=JUNBARE}; | |
static const unsigned char jstr[256] = { | |
[32 ... 33]=JNXT, [34]=JQDWN, [35 ... 92]=JNXT, [93]=JESC, [94 ... 126]=JNXT, | |
[192 ... 223]=JUTF8_2, [224 ... 239]=JUTF8_3, [240 ... 247]=JUTF8_4}; | |
static const unsigned char jutf8[256] = {[128 ... 191]=JUTF8_N}; | |
static const unsigned char jesc[256] = { ['"']=JUNESC, ['\\']=JUNESC, ['/']=JUNESC, | |
['b']=JUNESC, ['f']=JUNESC, ['n']=JUNESC, ['r']=JUNESC, ['t']=JUNESC, ['u']=JUNESC}; | |
static struct jtok | |
jparse(struct jctx *ctx) | |
{ | |
const char *str = 0; | |
int remain = 0, c = 0; | |
ctx->tbl = (ctx->tbl) ? ctx->tbl: jtbl; | |
#define t(...)(struct jtok){__VA_ARGS__} | |
while (ctx->cur < ctx->end) { | |
switch (ctx->tbl[c = (unsigned char)(*ctx->cur++)]) { | |
case JFAILED: return t(.type = JERR); | |
case JESC: ctx->tbl = jesc; break; | |
case JUNESC: ctx->tbl = jstr; break; | |
case JQUP: str = ctx->cur, ctx->tbl = jstr; break; | |
case JUTF8_2: ctx->tbl = jutf8, remain = 1; break; | |
case JUTF8_3: ctx->tbl = jutf8, remain = 2; break; | |
case JUTF8_4: ctx->tbl = jutf8, remain = 3; break; | |
case JUTF8_N: if (!--remain) ctx->tbl = jstr; break; | |
case JBARE: ctx->tbl=jbare, str = ctx->cur-1; break; | |
case JSEP: return t(.type=JTSEP, .str=ctx->cur-1, .len=1); | |
case JQDWN: ctx->tbl=jtbl; return t(.type = JSTR, .str=str, .len=(int)((ctx->cur-1)-str)); | |
case JUP: return t(.type=(c=='{')?JOBJB:JARRB, .str=ctx->cur-1, .len=1); | |
case JDWN: return t(.type=(c=='}')?JOBJE:JARRE, .str=ctx->cur-1, .len=1); | |
case JUNBARE: ctx->tbl=jtbl; return t(.str=str, .len=(int)(--ctx->cur - str), | |
.type=(str[0] == 't') ? JTRUE:(str[0] == 'f') ? JFALSE:(str[0] == 'n') ? JNULL:JNUM);} | |
} return t(.type=JEOS); | |
#undef t | |
} | |
int main(void) | |
{ | |
struct jtok t; | |
const char json[] = "{\"name\":\"test\", \"age\":42, \"utf8\":\"äöü\", \"alive\":true}"; | |
struct jctx ctx = {.cur = json, .end = json + sizeof(json)}; | |
while ((t = jparse(&ctx)).type) { | |
switch (t.type) {case JERR: goto err; | |
case JOBJB: printf("Token(Object_Begin)\n"); break; | |
case JOBJE: printf("Token(Object_End)\n"); break; | |
case JARRB: printf("Token(Array_Begin)\n"); break; | |
case JARRE: printf("Token(Array_End)\n"); break; | |
case JNUM: printf("Token(Number): %.*s\n", t.len, t.str); break; | |
case JSTR: printf("Token(String): %.*s\n", t.len, t.str); break; | |
case JTRUE: printf("Token(True): %.*s\n", t.len, t.str); break; | |
case JFALSE: printf("Token(False): %.*s\n", t.len, t.str); break; | |
case JNULL: printf("Token(NULL): %.*s\n", t.len, t.str); break; | |
case JTSEP: printf("Token(SEP): '%.*s'\n", t.len, t.str); break;} | |
} err: return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment