Skip to content

Instantly share code, notes, and snippets.

@kholisrag
Forked from elementalvoid/grok-to-regex.py
Last active August 16, 2024 11:16
Show Gist options
  • Save kholisrag/caa737bd460f0f0fe93ce4fe26e3dbd4 to your computer and use it in GitHub Desktop.
Save kholisrag/caa737bd460f0f0fe93ce4fe26e3dbd4 to your computer and use it in GitHub Desktop.
Improved grok-to-regex.py to run with python 3.12 and nested grok pattern
#!/usr/bin/env python
import argparse
import re
from os import walk
from os.path import join
def get_patterns(patterns_dir):
patterns = {}
for (dirpath, _, filenames) in walk(patterns_dir):
for name in filenames:
with open(join(dirpath, name)) as f:
for line in f.readlines():
if not line.startswith('#') and not line.strip() == "":
k, v = line.split(' ', 1)
patterns[k] = v.rstrip('\n')
return patterns
def convert(expression, patterns):
groks = re.compile('%{[^}]*}')
failed_matches = set()
def replace_groks(expr):
matches = groks.findall(expr)
for m in matches:
inner = m.strip('%{}')
parts = inner.split(':')
if len(parts) == 2:
patt, name = parts
replacement = '(?<{}>{{}})'.format(name)
elif len(parts) == 1:
patt = parts[0]
replacement = '{}'
else:
failed_matches.add(inner)
continue
if patt in patterns:
# Recursively process the replacement if it contains more grok patterns
replaced_value = replace_groks(patterns[patt])
expr = expr.replace(m, replacement.format(replaced_value))
else:
failed_matches.add(inner)
return expr
final_expression = replace_groks(expression)
print(final_expression)
if failed_matches:
global args
print('\nWarning! Unable to match the following expressions:')
print(' {}'.format(', '.join(failed_matches)))
print('This could be a typo or a missing grok pattern file. Double check your grok patterns directory: {}'.format(
args.patterns_dir
))
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('expression', metavar='expr', help='A grok expression.')
parser.add_argument('-d', '--patterns-dir', dest='patterns_dir', default='patterns',
help='Directory to find grok patterns.')
args = parser.parse_args()
patterns = get_patterns(args.patterns_dir)
convert(args.expression, patterns)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment