Last active
September 28, 2022 18:17
-
-
Save tinkertim/893e4a16d610d3e2bb3c60f2778f694f to your computer and use it in GitHub Desktop.
Grep + Awk in super small memory footprint form factor for use as a log saw on embedded systems.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Copyright (c) 2008, Tim Post <tinkertim@gmail.com> | |
* All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are met: | |
* | |
* Redistributions of source code must retain the above copyright notice, this | |
* list of conditions and the following disclaimer. | |
* | |
* Redistributions in binary form must reproduce the above copyright notice, | |
* this list of conditions and the following disclaimer in the documentation | |
* and/or other materials provided with the distribution. | |
* | |
* Neither the name of the original program's authors nor the names of its | |
* contributors may be used to endorse or promote products derived from this | |
* software without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
* POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
/* Some example usages: | |
* grawk shutdown '$5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15' messages | |
* grawk shutdown '$5, $6, $7, $8, $9, $10, " -- " $1, $2, $3' messages | |
* grawk dhclient '$1, $2 " \"$$\"-- " $3' syslog | |
* cat syslog | grawk dhclient '$0' | |
* cat myservice.log | grawk -F , error '$3' | |
* | |
* Contributors: | |
* Tim Post, Nicholas Clements, Alex Karlov | |
* We hope that you find this useful! */ | |
/* FIXME: | |
* readline() should probably be renamed | |
*/ | |
/* TODO: | |
* Add a tail -f like behavior that applies expressions and fields | |
* Recursive (like grep -r) or at least honor symlinks ? */ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <getopt.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <regex.h> | |
#define VERSION "1.0.7" | |
#define MAINTAINER "Tim Post <echo@echoreply.us>" | |
/* Storage structure to hold awk-style pattern */ | |
struct awk_pattern | |
{ | |
int maxfield; /* Maximum field number for $# fields */ | |
int numfields; /* Number of awk pattern fields */ | |
char **fields; /* The awk pattern fields */ | |
}; | |
typedef struct awk_pattern awk_pat_t; | |
/* Option arguments */ | |
static struct option const long_options[] = { | |
{ "ignore-case", no_argument, 0, 'i' }, | |
{ "with-filename", no_argument, 0, 'W' }, | |
{ "no-filename", no_argument, 0, 'w' }, | |
{ "line-number", no_argument, 0, 'n' }, | |
{ "field-separator", required_argument, 0, 'F' }, | |
{ "help", no_argument, 0, 'h' }, | |
{ "version", no_argument, 0, 'v' }, | |
{ 0, 0, 0, 0} | |
}; | |
/* The official name of the program */ | |
const char *progname = "grawk"; | |
/* Global for delimiters used in tokenizing strings */ | |
char *tokdelim = NULL; | |
/* Prototypes */ | |
static void usage(void); | |
static int process(FILE *, regex_t, awk_pat_t, char *, int); | |
static int process_line(char *, awk_pat_t, char *, char *); | |
static int process_files(int, char **, regex_t, awk_pat_t, int, int); | |
static int process_pipe(regex_t, awk_pat_t, int); | |
static int awkcomp(awk_pat_t *, char *); | |
static void awkfree(awk_pat_t *); | |
static char *readline(FILE *); | |
static void usage(void) | |
{ | |
printf("%s %s\n", progname, VERSION); | |
printf("Usage: %s [OPTION] PATTERN OUTPUT_PATTERN file1 [file2]...\n", | |
progname); | |
printf("Options:\n"); | |
printf(" --help " | |
"show help and examples\n"); | |
printf(" -i, --ignore-case " | |
"ignore case distinctions\n"); | |
printf(" -W, --with-filename " | |
"Print filename for each match\n"); | |
printf(" -w, --no-filename " | |
"Never print filename for each match\n"); | |
printf(" -n, --line-number " | |
"Prefix each line of output with line number.\n"); | |
printf(" -F fs, --field-separator=fs " | |
"Use fs as the field separator\n"); | |
printf(" -h, --help " | |
"Print a brief help summary\n"); | |
printf(" -v, --version " | |
"Print version information and exit normally\n"); | |
printf(" PATTERN " | |
"a basic regular expression\n"); | |
printf(" OUTPUT_PATTERN " | |
"awk-style print statement; defines " | |
"output fields\n"); | |
printf("\nExamples:\n"); | |
printf(" Retrieve joe123's home directory from /etc/passwd:\n"); | |
printf("\t%s -F : \"joe123\" '$6' /etc/passwd\n", progname); | |
printf("\n Find fields 2 3 and 4 on lines that begin with @ from stdin:\n"); | |
printf("\tcat file.txt | %s \"^@\" '$2,$3,$4'\n", progname); | |
printf("\n Use as a simple grep:\n"); | |
printf("\t%s \"string to find\" '$0' /file.txt\n", progname); | |
printf("\nReport bugs to %s\n", MAINTAINER); | |
} | |
/* readline() - read a line from the file handle. | |
* Return an allocated string */ | |
static char *readline(FILE *fp) | |
{ | |
char *str = (char *)NULL; | |
int ch = 0, len = 256, step = 256, i = 0; | |
str = (char *)malloc(len); | |
if (str == NULL) | |
return str; | |
while (1) { | |
ch = fgetc(fp); | |
if (feof(fp)) | |
break; | |
if (ch == '\n' || ch == '\r') { | |
str[i++] = 0; | |
break; | |
} | |
str[i++] = ch; | |
if (i == len - 2) { | |
len += step; | |
str = (char *)realloc(str, len); | |
if (str == NULL) { | |
fclose(fp); | |
return str; | |
} | |
} | |
} | |
return str; | |
} | |
/* process() - this is the actual processing where we compare against a | |
* previously compiled grep pattern and output based on the awk pattern. | |
* The file is opened by the calling function. We pass in an empty string | |
* if we don't want to show the filename. If we want to show the line number, | |
* the value of show_lineno is 1. If we find a line, return 1. If no line is | |
* found, return 0. If an error occurs, return -1. */ | |
static int process(FILE *fp, regex_t re, awk_pat_t awk, | |
char *filename, int show_lineno) | |
{ | |
char *inbuf = NULL; | |
char slineno[32]; | |
memset(slineno, 0, sizeof(slineno)); | |
long lineno = 0; | |
int found = 0; | |
while (1) { | |
inbuf = readline(fp); | |
if (!inbuf) | |
break; | |
if (feof(fp)) | |
break; | |
lineno++; | |
if (regexec(&re, inbuf, (size_t)0, NULL, 0) == 0) { | |
found = 1; // Found a line. | |
if (show_lineno) | |
sprintf(slineno, "%ld:", lineno); | |
if (process_line(inbuf, awk, filename, slineno)) { | |
fprintf (stderr, "Error processing line [%s]\n", inbuf); | |
free (inbuf); | |
return -1; | |
} | |
} | |
free (inbuf); | |
} | |
if (inbuf) | |
free(inbuf); | |
return found; | |
} | |
/* process_files() - process one or more files from the command-line. | |
* If at least one line is found, return 1, else return 0 if no lines | |
* were found or an error occurs. */ | |
static int process_files(int numfiles, char **files, regex_t re, awk_pat_t awk, | |
int show_filename, int show_lineno) | |
{ | |
int i, found = 0; | |
FILE *fp = NULL; | |
struct stat fstat; | |
char filename[1024]; | |
memset(filename, 0, sizeof(filename)); | |
for(i = 0; i < numfiles; i++) { | |
if (stat(files[i], &fstat) == -1) { | |
/* Did a file get deleted from the time we started running? */ | |
fprintf (stderr, | |
"Error accessing file %s. No such file\n", files[i]); | |
continue; | |
} | |
if (show_filename) | |
sprintf( filename, "%s:", files[i] ); | |
/* For now, we aren't recursive. Perhaps allow symlinks? */ | |
if ((fstat.st_mode & S_IFMT) != S_IFREG) | |
continue; | |
if (NULL == (fp = fopen(files[i], "r"))) { | |
fprintf(stderr, | |
"Error opening file %s. Permission denied\n", files[i]); | |
continue; | |
} | |
if (process(fp, re, awk, filename, show_lineno) == 1) | |
found = 1; | |
fclose(fp); | |
} | |
return found; | |
} | |
/* process_pipe() - process input from stdin */ | |
static int process_pipe(regex_t re, awk_pat_t awk, int show_lineno) | |
{ | |
if (process(stdin, re, awk, "", show_lineno) == 1) | |
return 1; | |
return 0; | |
} | |
/* process_line() - process the line based on the awk-style pattern and output | |
* the results. */ | |
static int process_line(char *inbuf, awk_pat_t awk, char *filename, char *lineno) | |
{ | |
char full_line[3] = { '\1', '0', '\0' }; | |
if (awk.numfields == 1 && strcmp(awk.fields[0], full_line) == 0) { | |
/* If the caller only wants the whole string, oblige, quickly. */ | |
fprintf (stdout, "%s%s%s\n", filename, lineno, inbuf); | |
return 0; | |
} | |
/* Build an array of fields from the line using strtok() | |
* TODO: make this re-entrant so that grawk can be spawned as a thread */ | |
char **linefields = (char **)malloc((awk.maxfield + 1) * sizeof(char *)); | |
char *wrkbuf = strdup(inbuf), *tbuf; | |
int count = 0, n = 1, i; | |
for (i = 0; i < (awk.maxfield + 1); i++) { | |
linefields[i] = NULL; | |
} | |
tbuf = strtok(wrkbuf, tokdelim); | |
if(tbuf) | |
linefields[0] = strdup(tbuf); | |
while (tbuf != NULL) { | |
tbuf = strtok(NULL, tokdelim); | |
if (!tbuf) | |
break; | |
count++; | |
if (count > awk.maxfield) | |
break; | |
linefields[count] = strdup(tbuf); | |
if (!linefields[count]) { | |
fprintf(stderr, "Could not allocate memory to process file %s\n", | |
filename); | |
return -1; | |
} | |
} | |
/* For each field in the awk structure, | |
* find the field and print it to stdout.*/ | |
fprintf(stdout, "%s%s", filename, lineno); /* if needed */ | |
for (i = 0; i < awk.numfields; i++) { | |
if (awk.fields[i][0] == '\1') { | |
n = atoi(&awk.fields[i][1]); | |
if (n == 0) { | |
fprintf(stdout, "%s", inbuf); | |
continue; | |
} | |
if (linefields[n-1]) | |
fprintf(stdout, "%s", linefields[n-1]); | |
continue; | |
} else | |
fprintf(stdout, "%s", awk.fields[i]); | |
} | |
fprintf(stdout, "\n"); | |
/* Cleanup */ | |
if (wrkbuf) | |
free(wrkbuf); | |
for (i = 0; i < count; i++) { | |
free(linefields[i]); | |
linefields[i] = (char *) NULL; | |
} | |
free(linefields); | |
linefields = (char **)NULL; | |
return 0; | |
} | |
/* awkcomp() - little awk-style print format compilation routine. | |
* Returns structure with the apattern broken down into an array for easier | |
* comparison and printing. Handles string literals as well as fields and | |
* delimiters. Example: $1,$2 " \$ and \"blah\" " $4 | |
* Returns -1 on error, else 0. */ | |
static int awkcomp(awk_pat_t *awk, char *apattern) | |
{ | |
awk->maxfield = 0; | |
awk->numfields = 0; | |
awk->fields = NULL; | |
awk->fields = (char **)malloc(sizeof(char *)); | |
int i, num = 0; | |
char *wrkbuf; | |
wrkbuf = (char *)malloc(strlen(apattern) + 1); | |
if (wrkbuf == NULL) { | |
free(awk); | |
fprintf(stderr, "Memory allocation error (wrkbuf) in awkcomp()\n"); | |
return -1; | |
} | |
int inString = 0, offs = 0; | |
char ch; | |
for (i = 0; i < strlen( apattern ); i++) { | |
ch = apattern[i]; | |
if (inString && ch != '"' && ch != '\\') { | |
wrkbuf[offs++] = ch; | |
continue; | |
} | |
if (ch == ' ') | |
continue; | |
switch (ch) { | |
/* Handle delimited strings inside of literal strings */ | |
case '\\': | |
if (inString) { | |
wrkbuf[offs++] = apattern[++i]; | |
continue; | |
} else { | |
/* Unexpected and unconventional escape (can get these | |
* from improper invocations of sed in a pipe with grawk), | |
* if sed is used to build the field delimiters */ | |
fprintf(stderr, | |
"Unexpected character \'\\\' in output format\n"); | |
return -1; | |
} | |
break; | |
/* Beginning or ending of a literal string */ | |
case '"': | |
inString = !inString; | |
if (inString) | |
continue; | |
break; | |
/* Handle the awk-like $# field variables */ | |
case '$': | |
/* We use a non-printable ASCII character to | |
* delimit the string field values.*/ | |
wrkbuf[offs++] = '\1'; | |
/* We also need the max. field number */ | |
num = 0; | |
while (1) { | |
ch = apattern[++i]; | |
/* Not a number, exit this loop */ | |
if (ch < 48 || ch > 57) { | |
i--; | |
break; | |
} | |
num = (num * 10) + (ch - 48); | |
wrkbuf[offs++] = ch; | |
} | |
if (num > awk->maxfield) | |
awk->maxfield = num; | |
/* Incomplete expression, a $ not followed by a number */ | |
if (wrkbuf[1] == 0) { | |
fprintf(stderr, "Incomplete field descriptor at " | |
"or near character %d in awk pattern\n", i+1); | |
return -1; | |
} | |
break; | |
/* Field separator */ | |
case ',': | |
wrkbuf[offs++] = ' '; | |
break; | |
} | |
/* if wrkbuf has nothing, we've got rubbish. Continue in the hopes | |
* that something else makes sense. */ | |
if (offs == 0) | |
continue; | |
/* End of a field reached, put it into awk->fields */ | |
wrkbuf[offs] = '\0'; | |
awk->fields = | |
(char **)realloc(awk->fields, (awk->numfields + 1) | |
* sizeof(char *)); | |
if (!awk->fields ) { | |
fprintf(stderr, | |
"Memory allocation error (awk->fields) in awkcomp()\n"); | |
return -1; | |
} | |
awk->fields[awk->numfields] = strdup(wrkbuf); | |
if (!awk->fields[awk->numfields]) { | |
fprintf(stderr, | |
"Memory allocation error (awk->fields[%d]) in awkcomp()\n", | |
awk->numfields); | |
return -1; | |
} | |
memset(wrkbuf, 0, strlen(apattern) + 1); | |
awk->numfields++; | |
offs = 0; | |
} | |
free(wrkbuf); | |
if (awk->numfields == 0) { | |
fprintf(stderr, | |
"Unable to parse and compile the pattern; no fields found\n"); | |
return -1; | |
} | |
return 0; | |
} | |
/* awkfree() - free a previously allocated awk_pat structure */ | |
static void awkfree(awk_pat_t *awk ) | |
{ | |
int i; | |
for (i = 0; i < awk->numfields; i++) | |
free(awk->fields[i]); | |
free(awk->fields); | |
} | |
int main(int argc, char **argv) | |
{ | |
char *apattern = NULL, *gpattern = NULL; | |
char **files = NULL; | |
int numfiles = 0, i = 0, c = 0; | |
int ignore_case = 0, no_filename = 0, with_filename = 0, line_number = 0; | |
if (argc < 3) { | |
usage(); | |
return EXIT_FAILURE; | |
} | |
tokdelim = strdup("\t\r\n "); | |
while (1) { | |
int opt_ind = 0; | |
while (c != -1) { | |
c = getopt_long(argc, argv, "wWhinF:", long_options, &opt_ind); | |
switch (c) { | |
case 'w': | |
with_filename = 0; | |
no_filename = 1; | |
break; | |
case 'i': | |
ignore_case = 1; | |
break; | |
case 'W': | |
with_filename = 1; | |
no_filename = 0; | |
break; | |
case 'n': | |
line_number = 1; | |
break; | |
case 'F': | |
tokdelim = realloc(tokdelim, 3 + strlen(optarg) + 1); | |
memset(tokdelim, 0, 3 + strlen( optarg ) + 1); | |
sprintf(tokdelim, "\t\r\n%s", optarg); | |
break; | |
case 'h': | |
usage(); | |
free(tokdelim); | |
return EXIT_SUCCESS; | |
break; | |
case 'v': | |
printf("%s\n", VERSION); | |
free(tokdelim); | |
return EXIT_SUCCESS; | |
break; | |
} | |
} | |
/* Now we'll grab our patterns and files. */ | |
if ((argc - optind) < 2) { | |
usage(); | |
free(tokdelim); | |
return EXIT_FAILURE; | |
} | |
/* pattern one will be our "grep" pattern */ | |
gpattern = strdup(argv[optind]); | |
if (gpattern == NULL) { | |
fprintf(stderr, "Memory allocation error"); | |
exit(EXIT_FAILURE); | |
} | |
optind++; | |
/* pattern two is our "awk" pattern */ | |
apattern = strdup(argv[optind]); | |
if(apattern == NULL) { | |
fprintf(stderr, "Memory allocation error"); | |
exit(EXIT_FAILURE); | |
} | |
optind++; | |
/* Anything that remains is a file or wildcard which should be | |
* expanded by the calling shell. */ | |
if (optind < argc) { | |
numfiles = argc - optind; | |
files = (char **)malloc(sizeof(char *) * (numfiles + 1)); | |
for (i = 0; i < numfiles; i++) { | |
files[i] = strdup(argv[optind + i]); | |
} | |
} | |
/* If the number of files is greater than 1 then we default to | |
* showing the filename unless specifically directed against it.*/ | |
if (numfiles > 1 && no_filename == 0) | |
with_filename = 1; | |
break; | |
} | |
/* Process everything */ | |
regex_t re; | |
int cflags = 0, rc = 0; | |
if (ignore_case) | |
cflags = REG_ICASE; | |
/* compile the regular expression parser */ | |
if (regcomp(&re, gpattern, cflags)) { | |
fprintf(stderr, | |
"Error compiling grep-style pattern [%s]\n", gpattern); | |
return EXIT_FAILURE; | |
} | |
awk_pat_t awk; | |
if (awkcomp(&awk, apattern)) | |
{ | |
fprintf(stderr, | |
"Error compiling awk-style pattern [%s]\n", apattern); | |
return EXIT_FAILURE; | |
} | |
if (numfiles > 0) { | |
if(process_files( | |
numfiles, files, re, awk, with_filename, line_number) == 0) | |
rc = 255; // We'll return 255 if no lines were found. | |
} else { | |
if(process_pipe(re, awk, line_number) == 0) | |
rc = 255; | |
} | |
/* Destructor */ | |
for (i = 0; i < numfiles; i++) { | |
if (files[i]) | |
free(files[i]); | |
} | |
free(files); | |
/* Awk pattern */ | |
free(apattern); | |
/* Grep pattern */ | |
free(gpattern); | |
/* Grep regex */ | |
regfree(&re); | |
/* Awk pattern structure */ | |
awkfree(&awk); | |
/* Token delimiter (might have been freed elsewhere) */ | |
if (tokdelim) | |
free(tokdelim); | |
return rc; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment