Last active
January 13, 2016 10:00
-
-
Save ecnelises/50b318a44fc743b2182b to your computer and use it in GitHub Desktop.
支持基本正则运算的正则引擎
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* regex.c | |
* Qiu Chaofan, 2016/1/12 | |
* | |
* Regular expression engine. | |
* | |
* Features: | |
* Support matching for | * ? + . () | |
* No error handling is considered yet. | |
*/ | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#define STATE_ANYCHAR -7 /* . */ | |
#define STATE_OR -2 /* | */ | |
#define STATE_QMARK -3 /* ? */ | |
#define STATE_ASTERISK -4 /* * */ | |
#define STATE_PLUS -5 /* + */ | |
#define STATE_JOIN -6 /* ( */ | |
/* | |
* Use of STATE_JOIN is for tackling parentheses. | |
* For example, if we parse '(a(b|c)d)', it should be like this: | |
* a | |
* | | |
* --- | |
* | | | |
* JOIN NULL | |
* | | | |
* d OR | |
* || | |
* bc | |
*/ | |
struct state { | |
int c; | |
struct state *lc; | |
struct state *rc; | |
}; | |
typedef struct state * qregex_t; | |
qregex_t qregex_start(const char *pattern); | |
void qregex_end(qregex_t item); | |
void qregex_match_print(const char *text, qregex_t pattern); | |
struct state *parse_regex(const char *pattern, const char *end); | |
const char *match_from_tree_end(const char *text, struct state *pattern_tree); | |
qregex_t qregex_start(const char *pattern) | |
{ | |
return parse_regex(pattern, pattern + strlen(pattern)); | |
} | |
void qregex_end(qregex_t item) | |
{ | |
if (item == NULL) { | |
return; | |
} | |
qregex_end(item->lc); | |
qregex_end(item->rc); | |
free(item); | |
} | |
void qregex_match_print(const char *text, qregex_t pattern) | |
{ | |
if (pattern == NULL) { | |
puts(text); | |
} | |
const char *tmp = NULL; | |
while (*text != '\0') { | |
if ((tmp = match_from_tree_end(text, pattern)) == NULL) { | |
++text; | |
} else { | |
while (text != tmp) { | |
putchar(*text++); | |
} | |
putchar('\n'); | |
} | |
} | |
} | |
/* | |
* Generate a regex tree from a regex string. | |
*/ | |
struct state *parse_regex(const char *pattern, const char *end) | |
{ | |
if (pattern == end) { | |
return NULL; | |
} | |
bool once_out = false; | |
int paren = 0; | |
struct state *current = NULL, *tmp = NULL; | |
const char *or_pos = NULL; | |
if (*pattern != '(') { | |
once_out = true; | |
} | |
/* Find whether there's an 'or' sign. */ | |
for (const char *i = pattern; i < end; ++i) { | |
if (*i == '(') { | |
++paren; | |
} else if (*i == ')') { | |
--paren; | |
} else if (*i == '|' && paren == 0) { | |
or_pos = i; | |
once_out = true; | |
break; | |
} | |
if (paren == 0 && i < end - 1) { | |
once_out = true; | |
} | |
} | |
if (or_pos != NULL) { | |
current = malloc(sizeof(struct state)); | |
current->c = STATE_OR; | |
current->lc = parse_regex(pattern, or_pos); | |
current->rc = parse_regex(++or_pos, end); | |
return current; | |
} else if (!once_out) { | |
return parse_regex(++pattern, --end); | |
} else { | |
if (*pattern == '(') { | |
const char *find_rparen = pattern; | |
paren = 0; | |
for (find_rparen = pattern; find_rparen < end; ++find_rparen) { | |
if (*find_rparen == '(') { | |
++paren; | |
} else if (*find_rparen == ')') { | |
if (--paren == 0) { | |
break; | |
} | |
} | |
} | |
tmp = parse_regex(++pattern, find_rparen); | |
current = malloc(sizeof(struct state)); | |
current->c = STATE_JOIN; | |
current->rc = tmp; | |
current->lc = NULL; | |
pattern = ++find_rparen; | |
} else { | |
current = malloc(sizeof(struct state)); | |
if (*pattern == '.') { | |
current->c = STATE_ANYCHAR; | |
++pattern; | |
} else { | |
current->c = *pattern++; | |
} | |
current->rc = NULL; | |
} | |
if (*pattern != '\0' && strchr("+*?", *pattern) != NULL) { | |
tmp = current; | |
tmp->lc = NULL; | |
current = malloc(sizeof(struct state)); | |
if (*pattern == '+') { | |
current->c = STATE_PLUS; | |
} else if (*pattern == '*') { | |
current->c = STATE_ASTERISK; | |
} else if (*pattern == '?') { | |
current->c = STATE_QMARK; | |
} | |
current->lc = tmp; | |
current->rc = parse_regex(++pattern, end); | |
} else { | |
current->lc = parse_regex(pattern, end); | |
} | |
} | |
return current; | |
} | |
/* | |
* Return the end of matching. | |
* If matching failed, return null. | |
*/ | |
const char *match_from_tree_end(const char *text, struct state *pattern_tree) | |
{ | |
if (pattern_tree == NULL) { | |
return text; | |
} | |
const char *res = NULL; | |
switch (pattern_tree->c) { | |
case STATE_OR: | |
if ((res = match_from_tree_end(text, pattern_tree->lc)) != NULL) { | |
return res; | |
} else { | |
return match_from_tree_end(text, pattern_tree->rc); | |
} | |
case STATE_QMARK: | |
res = text; | |
if ((res = match_from_tree_end(res, pattern_tree->lc)) != NULL) { | |
text = res; | |
} | |
return match_from_tree_end(text, pattern_tree->rc); | |
case STATE_PLUS: | |
if ((text = match_from_tree_end(text, pattern_tree->lc)) == NULL) { | |
return NULL; | |
} | |
/* fall through */ | |
case STATE_ASTERISK: | |
res = text; | |
while ((res = match_from_tree_end(res, pattern_tree->lc)) != NULL) { | |
text = res; | |
} | |
return match_from_tree_end(text, pattern_tree->rc); | |
case STATE_JOIN: | |
if ((text = match_from_tree_end(text, pattern_tree->rc)) == NULL) { | |
return NULL; | |
} | |
return match_from_tree_end(text, pattern_tree->lc); | |
case STATE_ANYCHAR: | |
if (*text != '\0') { | |
return match_from_tree_end(++text, pattern_tree->lc); | |
} else { | |
return NULL; | |
} | |
default: | |
if (*text == pattern_tree->c) { | |
return match_from_tree_end(++text, pattern_tree->lc); | |
} else { | |
return NULL; | |
} | |
} | |
} | |
int main(int argc, char *argv[]) | |
{ | |
qregex_t s = qregex_start("colo.r"); | |
qregex_match_print("color twinkle colour cowl colou colorr", s); | |
/* | |
* If it runs well, it will print: | |
* | |
* colour | |
* colorr | |
* | |
* into the screen. | |
*/ | |
qregex_end(s); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment